1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2022 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
26 #define INCLUDE_STRING
28 #include "coretypes.h"
38 #include "stringpool.h"
45 #include "diagnostic-core.h"
47 #include "fold-const.h"
48 #include "stor-layout.h"
52 #include "insn-attr.h"
58 #include "sched-int.h"
59 #include "common/common-target.h"
60 #include "langhooks.h"
65 #include "target-globals.h"
67 #include "tm-constrs.h"
69 #include "optabs-libfuncs.h"
73 #include "tree-vectorizer.h"
76 /* This file should be included last. */
77 #include "target-def.h"
79 /* Forward definitions of types. */
80 typedef struct minipool_node Mnode
;
81 typedef struct minipool_fixup Mfix
;
83 void (*arm_lang_output_object_attributes_hook
)(void);
90 /* Forward function declarations. */
91 static bool arm_const_not_ok_for_debug_p (rtx
);
92 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
93 static int arm_compute_static_chain_stack_bytes (void);
94 static arm_stack_offsets
*arm_get_frame_offsets (void);
95 static void arm_compute_frame_layout (void);
96 static void arm_add_gc_roots (void);
97 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
98 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
99 static unsigned bit_count (unsigned long);
100 static unsigned bitmap_popcount (const sbitmap
);
101 static int arm_address_register_rtx_p (rtx
, int);
102 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
103 static bool is_called_in_ARM_mode (tree
);
104 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
105 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
106 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
107 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
108 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
109 inline static int thumb1_index_register_rtx_p (rtx
, int);
110 static int thumb_far_jump_used_p (void);
111 static bool thumb_force_lr_save (void);
112 static unsigned arm_size_return_regs (void);
113 static bool arm_assemble_integer (rtx
, unsigned int, int);
114 static void arm_print_operand (FILE *, rtx
, int);
115 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
116 static bool arm_print_operand_punct_valid_p (unsigned char code
);
117 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
118 static arm_cc
get_arm_condition_code (rtx
);
119 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
120 static const char *output_multi_immediate (rtx
*, const char *, const char *,
122 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
123 static struct machine_function
*arm_init_machine_status (void);
124 static void thumb_exit (FILE *, int);
125 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
126 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
127 static Mnode
*add_minipool_forward_ref (Mfix
*);
128 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
129 static Mnode
*add_minipool_backward_ref (Mfix
*);
130 static void assign_minipool_offsets (Mfix
*);
131 static void arm_print_value (FILE *, rtx
);
132 static void dump_minipool (rtx_insn
*);
133 static int arm_barrier_cost (rtx_insn
*);
134 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
135 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
136 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
138 static void arm_reorg (void);
139 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
140 static unsigned long arm_compute_save_reg0_reg12_mask (void);
141 static unsigned long arm_compute_save_core_reg_mask (void);
142 static unsigned long arm_isr_value (tree
);
143 static unsigned long arm_compute_func_type (void);
144 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
145 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
146 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
147 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
148 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
150 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
151 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
152 static void arm_output_function_epilogue (FILE *);
153 static void arm_output_function_prologue (FILE *);
154 static int arm_comp_type_attributes (const_tree
, const_tree
);
155 static void arm_set_default_type_attributes (tree
);
156 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
157 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
158 static int optimal_immediate_sequence (enum rtx_code code
,
159 unsigned HOST_WIDE_INT val
,
160 struct four_ints
*return_sequence
);
161 static int optimal_immediate_sequence_1 (enum rtx_code code
,
162 unsigned HOST_WIDE_INT val
,
163 struct four_ints
*return_sequence
,
165 static int arm_get_strip_length (int);
166 static bool arm_function_ok_for_sibcall (tree
, tree
);
167 static machine_mode
arm_promote_function_mode (const_tree
,
170 static bool arm_return_in_memory (const_tree
, const_tree
);
171 static rtx
arm_function_value (const_tree
, const_tree
, bool);
172 static rtx
arm_libcall_value_1 (machine_mode
);
173 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
174 static bool arm_function_value_regno_p (const unsigned int);
175 static void arm_internal_label (FILE *, const char *, unsigned long);
176 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
178 static bool arm_have_conditional_execution (void);
179 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
180 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
181 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
182 static int arm_insn_cost (rtx_insn
*, bool);
183 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
184 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
185 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
186 static void emit_constant_insn (rtx cond
, rtx pattern
);
187 static rtx_insn
*emit_set_insn (rtx
, rtx
);
188 static void arm_add_cfa_adjust_cfa_note (rtx
, int, rtx
, rtx
);
189 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
190 static void arm_emit_multi_reg_pop (unsigned long);
191 static int vfp_emit_fstmd (int, int);
192 static void arm_emit_vfp_multi_reg_pop (int, int, rtx
);
193 static int arm_arg_partial_bytes (cumulative_args_t
,
194 const function_arg_info
&);
195 static rtx
arm_function_arg (cumulative_args_t
, const function_arg_info
&);
196 static void arm_function_arg_advance (cumulative_args_t
,
197 const function_arg_info
&);
198 static pad_direction
arm_function_arg_padding (machine_mode
, const_tree
);
199 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
200 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
202 static rtx
aapcs_libcall_value (machine_mode
);
203 static int aapcs_select_return_coproc (const_tree
, const_tree
);
205 #ifdef OBJECT_FORMAT_ELF
206 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
207 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
210 static void arm_encode_section_info (tree
, rtx
, int);
213 static void arm_file_end (void);
214 static void arm_file_start (void);
215 static void arm_insert_attributes (tree
, tree
*);
217 static void arm_setup_incoming_varargs (cumulative_args_t
,
218 const function_arg_info
&, int *, int);
219 static bool arm_pass_by_reference (cumulative_args_t
,
220 const function_arg_info
&);
221 static bool arm_promote_prototypes (const_tree
);
222 static bool arm_default_short_enums (void);
223 static bool arm_align_anon_bitfield (void);
224 static bool arm_return_in_msb (const_tree
);
225 static bool arm_must_pass_in_stack (const function_arg_info
&);
226 static bool arm_return_in_memory (const_tree
, const_tree
);
228 static void arm_unwind_emit (FILE *, rtx_insn
*);
229 static bool arm_output_ttype (rtx
);
230 static void arm_asm_emit_except_personality (rtx
);
232 static void arm_asm_init_sections (void);
233 static rtx
arm_dwarf_register_span (rtx
);
235 static tree
arm_cxx_guard_type (void);
236 static bool arm_cxx_guard_mask_bit (void);
237 static tree
arm_get_cookie_size (tree
);
238 static bool arm_cookie_has_size (void);
239 static bool arm_cxx_cdtor_returns_this (void);
240 static bool arm_cxx_key_method_may_be_inline (void);
241 static void arm_cxx_determine_class_data_visibility (tree
);
242 static bool arm_cxx_class_data_always_comdat (void);
243 static bool arm_cxx_use_aeabi_atexit (void);
244 static void arm_init_libfuncs (void);
245 static tree
arm_build_builtin_va_list (void);
246 static void arm_expand_builtin_va_start (tree
, rtx
);
247 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
248 static void arm_option_override (void);
249 static void arm_option_restore (struct gcc_options
*, struct gcc_options
*,
250 struct cl_target_option
*);
251 static void arm_override_options_after_change (void);
252 static void arm_option_print (FILE *, int, struct cl_target_option
*);
253 static void arm_set_current_function (tree
);
254 static bool arm_can_inline_p (tree
, tree
);
255 static void arm_relayout_function (tree
);
256 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
257 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
258 static bool arm_sched_can_speculate_insn (rtx_insn
*);
259 static bool arm_macro_fusion_p (void);
260 static bool arm_cannot_copy_insn_p (rtx_insn
*);
261 static int arm_issue_rate (void);
262 static int arm_sched_variable_issue (FILE *, int, rtx_insn
*, int);
263 static int arm_first_cycle_multipass_dfa_lookahead (void);
264 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
265 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
266 static bool arm_output_addr_const_extra (FILE *, rtx
);
267 static bool arm_allocate_stack_slots_for_args (void);
268 static bool arm_warn_func_return (tree
);
269 static tree
arm_promoted_type (const_tree t
);
270 static bool arm_scalar_mode_supported_p (scalar_mode
);
271 static bool arm_frame_pointer_required (void);
272 static bool arm_can_eliminate (const int, const int);
273 static void arm_asm_trampoline_template (FILE *);
274 static void arm_trampoline_init (rtx
, tree
, rtx
);
275 static rtx
arm_trampoline_adjust_address (rtx
);
276 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
277 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
278 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
279 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
280 static bool arm_array_mode_supported_p (machine_mode
,
281 unsigned HOST_WIDE_INT
);
282 static machine_mode
arm_preferred_simd_mode (scalar_mode
);
283 static bool arm_class_likely_spilled_p (reg_class_t
);
284 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
285 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
286 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
290 static void arm_conditional_register_usage (void);
291 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
292 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
293 static unsigned int arm_autovectorize_vector_modes (vector_modes
*, bool);
294 static int arm_default_branch_cost (bool, bool);
295 static int arm_cortex_a5_branch_cost (bool, bool);
296 static int arm_cortex_m_branch_cost (bool, bool);
297 static int arm_cortex_m7_branch_cost (bool, bool);
299 static bool arm_vectorize_vec_perm_const (machine_mode
, machine_mode
, rtx
, rtx
,
300 rtx
, const vec_perm_indices
&);
302 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
304 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
306 int misalign ATTRIBUTE_UNUSED
);
308 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
309 bool op0_preserve_value
);
310 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
312 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
313 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
315 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
316 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
317 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
319 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
320 static opt_scalar_float_mode
arm_floatn_mode (int, bool);
321 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode
);
322 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode
);
323 static bool arm_modes_tieable_p (machine_mode
, machine_mode
);
324 static HOST_WIDE_INT
arm_constant_alignment (const_tree
, HOST_WIDE_INT
);
325 static rtx_insn
*thumb1_md_asm_adjust (vec
<rtx
> &, vec
<rtx
> &,
327 vec
<const char *> &, vec
<rtx
> &,
328 HARD_REG_SET
&, location_t
);
329 static const char *arm_identify_fpu_from_isa (sbitmap
);
331 /* Table of machine attributes. */
332 static const struct attribute_spec arm_attribute_table
[] =
334 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
335 affects_type_identity, handler, exclude } */
336 /* Function calls made to this symbol must be done indirectly, because
337 it may lie outside of the 26 bit addressing range of a normal function
339 { "long_call", 0, 0, false, true, true, false, NULL
, NULL
},
340 /* Whereas these functions are always known to reside within the 26 bit
342 { "short_call", 0, 0, false, true, true, false, NULL
, NULL
},
343 /* Specify the procedure call conventions for a function. */
344 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute
,
346 /* Interrupt Service Routines have special prologue and epilogue requirements. */
347 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
349 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
351 { "naked", 0, 0, true, false, false, false,
352 arm_handle_fndecl_attribute
, NULL
},
354 /* ARM/PE has three new attributes:
356 dllexport - for exporting a function/variable that will live in a dll
357 dllimport - for importing a function/variable from a dll
359 Microsoft allows multiple declspecs in one __declspec, separating
360 them with spaces. We do NOT support this. Instead, use __declspec
363 { "dllimport", 0, 0, true, false, false, false, NULL
, NULL
},
364 { "dllexport", 0, 0, true, false, false, false, NULL
, NULL
},
365 { "interfacearm", 0, 0, true, false, false, false,
366 arm_handle_fndecl_attribute
, NULL
},
367 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
368 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute
,
370 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute
,
372 { "notshared", 0, 0, false, true, false, false,
373 arm_handle_notshared_attribute
, NULL
},
375 /* ARMv8-M Security Extensions support. */
376 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
377 arm_handle_cmse_nonsecure_entry
, NULL
},
378 { "cmse_nonsecure_call", 0, 0, false, false, false, true,
379 arm_handle_cmse_nonsecure_call
, NULL
},
380 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL
, NULL
},
381 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
384 /* Initialize the GCC target structure. */
385 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
386 #undef TARGET_MERGE_DECL_ATTRIBUTES
387 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
390 #undef TARGET_CHECK_BUILTIN_CALL
391 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
393 #undef TARGET_LEGITIMIZE_ADDRESS
394 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
396 #undef TARGET_ATTRIBUTE_TABLE
397 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
399 #undef TARGET_INSERT_ATTRIBUTES
400 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
402 #undef TARGET_ASM_FILE_START
403 #define TARGET_ASM_FILE_START arm_file_start
404 #undef TARGET_ASM_FILE_END
405 #define TARGET_ASM_FILE_END arm_file_end
407 #undef TARGET_ASM_ALIGNED_SI_OP
408 #define TARGET_ASM_ALIGNED_SI_OP NULL
409 #undef TARGET_ASM_INTEGER
410 #define TARGET_ASM_INTEGER arm_assemble_integer
412 #undef TARGET_PRINT_OPERAND
413 #define TARGET_PRINT_OPERAND arm_print_operand
414 #undef TARGET_PRINT_OPERAND_ADDRESS
415 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
416 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
417 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
419 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
420 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
422 #undef TARGET_ASM_FUNCTION_PROLOGUE
423 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
425 #undef TARGET_ASM_FUNCTION_EPILOGUE
426 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
428 #undef TARGET_CAN_INLINE_P
429 #define TARGET_CAN_INLINE_P arm_can_inline_p
431 #undef TARGET_RELAYOUT_FUNCTION
432 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
434 #undef TARGET_OPTION_OVERRIDE
435 #define TARGET_OPTION_OVERRIDE arm_option_override
437 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
438 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
440 #undef TARGET_OPTION_RESTORE
441 #define TARGET_OPTION_RESTORE arm_option_restore
443 #undef TARGET_OPTION_PRINT
444 #define TARGET_OPTION_PRINT arm_option_print
446 #undef TARGET_COMP_TYPE_ATTRIBUTES
447 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
449 #undef TARGET_SCHED_CAN_SPECULATE_INSN
450 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
452 #undef TARGET_SCHED_MACRO_FUSION_P
453 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
455 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
456 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
458 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
459 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
461 #undef TARGET_SCHED_ADJUST_COST
462 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
464 #undef TARGET_SET_CURRENT_FUNCTION
465 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
467 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
468 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
470 #undef TARGET_SCHED_REORDER
471 #define TARGET_SCHED_REORDER arm_sched_reorder
473 #undef TARGET_REGISTER_MOVE_COST
474 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
476 #undef TARGET_MEMORY_MOVE_COST
477 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
479 #undef TARGET_ENCODE_SECTION_INFO
481 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
483 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
486 #undef TARGET_STRIP_NAME_ENCODING
487 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
489 #undef TARGET_ASM_INTERNAL_LABEL
490 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
492 #undef TARGET_FLOATN_MODE
493 #define TARGET_FLOATN_MODE arm_floatn_mode
495 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
496 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
498 #undef TARGET_FUNCTION_VALUE
499 #define TARGET_FUNCTION_VALUE arm_function_value
501 #undef TARGET_LIBCALL_VALUE
502 #define TARGET_LIBCALL_VALUE arm_libcall_value
504 #undef TARGET_FUNCTION_VALUE_REGNO_P
505 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
507 #undef TARGET_ASM_OUTPUT_MI_THUNK
508 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
509 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
510 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
512 #undef TARGET_RTX_COSTS
513 #define TARGET_RTX_COSTS arm_rtx_costs
514 #undef TARGET_ADDRESS_COST
515 #define TARGET_ADDRESS_COST arm_address_cost
516 #undef TARGET_INSN_COST
517 #define TARGET_INSN_COST arm_insn_cost
519 #undef TARGET_SHIFT_TRUNCATION_MASK
520 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
521 #undef TARGET_VECTOR_MODE_SUPPORTED_P
522 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
523 #undef TARGET_ARRAY_MODE_SUPPORTED_P
524 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
525 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
526 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
527 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
528 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
529 arm_autovectorize_vector_modes
531 #undef TARGET_MACHINE_DEPENDENT_REORG
532 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
534 #undef TARGET_INIT_BUILTINS
535 #define TARGET_INIT_BUILTINS arm_init_builtins
536 #undef TARGET_EXPAND_BUILTIN
537 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
538 #undef TARGET_BUILTIN_DECL
539 #define TARGET_BUILTIN_DECL arm_builtin_decl
541 #undef TARGET_INIT_LIBFUNCS
542 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
544 #undef TARGET_PROMOTE_FUNCTION_MODE
545 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
546 #undef TARGET_PROMOTE_PROTOTYPES
547 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
548 #undef TARGET_PASS_BY_REFERENCE
549 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
550 #undef TARGET_ARG_PARTIAL_BYTES
551 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
552 #undef TARGET_FUNCTION_ARG
553 #define TARGET_FUNCTION_ARG arm_function_arg
554 #undef TARGET_FUNCTION_ARG_ADVANCE
555 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
556 #undef TARGET_FUNCTION_ARG_PADDING
557 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
558 #undef TARGET_FUNCTION_ARG_BOUNDARY
559 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
561 #undef TARGET_SETUP_INCOMING_VARARGS
562 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
564 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
565 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
567 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
568 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
569 #undef TARGET_TRAMPOLINE_INIT
570 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
571 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
572 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
574 #undef TARGET_WARN_FUNC_RETURN
575 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
577 #undef TARGET_DEFAULT_SHORT_ENUMS
578 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
580 #undef TARGET_ALIGN_ANON_BITFIELD
581 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
583 #undef TARGET_NARROW_VOLATILE_BITFIELD
584 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
586 #undef TARGET_CXX_GUARD_TYPE
587 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
589 #undef TARGET_CXX_GUARD_MASK_BIT
590 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
592 #undef TARGET_CXX_GET_COOKIE_SIZE
593 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
595 #undef TARGET_CXX_COOKIE_HAS_SIZE
596 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
598 #undef TARGET_CXX_CDTOR_RETURNS_THIS
599 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
601 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
602 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
604 #undef TARGET_CXX_USE_AEABI_ATEXIT
605 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
607 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
608 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
609 arm_cxx_determine_class_data_visibility
611 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
612 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
614 #undef TARGET_RETURN_IN_MSB
615 #define TARGET_RETURN_IN_MSB arm_return_in_msb
617 #undef TARGET_RETURN_IN_MEMORY
618 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
620 #undef TARGET_MUST_PASS_IN_STACK
621 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
624 #undef TARGET_ASM_UNWIND_EMIT
625 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
627 /* EABI unwinding tables use a different format for the typeinfo tables. */
628 #undef TARGET_ASM_TTYPE
629 #define TARGET_ASM_TTYPE arm_output_ttype
631 #undef TARGET_ARM_EABI_UNWINDER
632 #define TARGET_ARM_EABI_UNWINDER true
634 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
635 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
637 #endif /* ARM_UNWIND_INFO */
639 #undef TARGET_ASM_INIT_SECTIONS
640 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
642 #undef TARGET_DWARF_REGISTER_SPAN
643 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
645 #undef TARGET_CANNOT_COPY_INSN_P
646 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
649 #undef TARGET_HAVE_TLS
650 #define TARGET_HAVE_TLS true
653 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
654 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
656 #undef TARGET_LEGITIMATE_CONSTANT_P
657 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
659 #undef TARGET_CANNOT_FORCE_CONST_MEM
660 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
662 #undef TARGET_MAX_ANCHOR_OFFSET
663 #define TARGET_MAX_ANCHOR_OFFSET 4095
665 /* The minimum is set such that the total size of the block
666 for a particular anchor is -4088 + 1 + 4095 bytes, which is
667 divisible by eight, ensuring natural spacing of anchors. */
668 #undef TARGET_MIN_ANCHOR_OFFSET
669 #define TARGET_MIN_ANCHOR_OFFSET -4088
671 #undef TARGET_SCHED_ISSUE_RATE
672 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
674 #undef TARGET_SCHED_VARIABLE_ISSUE
675 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
677 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
678 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
679 arm_first_cycle_multipass_dfa_lookahead
681 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
682 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
683 arm_first_cycle_multipass_dfa_lookahead_guard
685 #undef TARGET_MANGLE_TYPE
686 #define TARGET_MANGLE_TYPE arm_mangle_type
688 #undef TARGET_INVALID_CONVERSION
689 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
691 #undef TARGET_INVALID_UNARY_OP
692 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
694 #undef TARGET_INVALID_BINARY_OP
695 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
697 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
698 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
700 #undef TARGET_BUILD_BUILTIN_VA_LIST
701 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
702 #undef TARGET_EXPAND_BUILTIN_VA_START
703 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
704 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
705 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
708 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
709 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
712 #undef TARGET_LEGITIMATE_ADDRESS_P
713 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
715 #undef TARGET_PREFERRED_RELOAD_CLASS
716 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
718 #undef TARGET_PROMOTED_TYPE
719 #define TARGET_PROMOTED_TYPE arm_promoted_type
721 #undef TARGET_SCALAR_MODE_SUPPORTED_P
722 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
724 #undef TARGET_COMPUTE_FRAME_LAYOUT
725 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
727 #undef TARGET_FRAME_POINTER_REQUIRED
728 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
730 #undef TARGET_CAN_ELIMINATE
731 #define TARGET_CAN_ELIMINATE arm_can_eliminate
733 #undef TARGET_CONDITIONAL_REGISTER_USAGE
734 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
736 #undef TARGET_CLASS_LIKELY_SPILLED_P
737 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
739 #undef TARGET_VECTORIZE_BUILTINS
740 #define TARGET_VECTORIZE_BUILTINS
742 #undef TARGET_VECTOR_ALIGNMENT
743 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
745 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
746 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
747 arm_vector_alignment_reachable
749 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
750 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
751 arm_builtin_support_vector_misalignment
753 #undef TARGET_PREFERRED_RENAME_CLASS
754 #define TARGET_PREFERRED_RENAME_CLASS \
755 arm_preferred_rename_class
757 #undef TARGET_VECTORIZE_VEC_PERM_CONST
758 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
760 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
761 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
762 arm_builtin_vectorization_cost
764 #undef TARGET_CANONICALIZE_COMPARISON
765 #define TARGET_CANONICALIZE_COMPARISON \
766 arm_canonicalize_comparison
768 #undef TARGET_ASAN_SHADOW_OFFSET
769 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
771 #undef MAX_INSN_PER_IT_BLOCK
772 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
774 #undef TARGET_CAN_USE_DOLOOP_P
775 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
777 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
778 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
780 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
781 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
783 #undef TARGET_SCHED_FUSION_PRIORITY
784 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
786 #undef TARGET_ASM_FUNCTION_SECTION
787 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
789 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
790 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
792 #undef TARGET_SECTION_TYPE_FLAGS
793 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
795 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
796 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
798 #undef TARGET_C_EXCESS_PRECISION
799 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
801 /* Although the architecture reserves bits 0 and 1, only the former is
802 used for ARM/Thumb ISA selection in v7 and earlier versions. */
803 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
804 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
806 #undef TARGET_FIXED_CONDITION_CODE_REGS
807 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
809 #undef TARGET_HARD_REGNO_NREGS
810 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
811 #undef TARGET_HARD_REGNO_MODE_OK
812 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
814 #undef TARGET_MODES_TIEABLE_P
815 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
817 #undef TARGET_CAN_CHANGE_MODE_CLASS
818 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
820 #undef TARGET_CONSTANT_ALIGNMENT
821 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
823 #undef TARGET_INVALID_WITHIN_DOLOOP
824 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
826 #undef TARGET_MD_ASM_ADJUST
827 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
829 #undef TARGET_STACK_PROTECT_GUARD
830 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
832 #undef TARGET_VECTORIZE_GET_MASK_MODE
833 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
835 /* Obstack for minipool constant handling. */
836 static struct obstack minipool_obstack
;
837 static char * minipool_startobj
;
839 /* The maximum number of insns skipped which
840 will be conditionalised if possible. */
841 static int max_insns_skipped
= 5;
843 /* True if we are currently building a constant table. */
844 int making_const_table
;
846 /* The processor for which instructions should be scheduled. */
847 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
849 /* The current tuning set. */
850 const struct tune_params
*current_tune
;
852 /* Which floating point hardware to schedule for. */
855 /* Used for Thumb call_via trampolines. */
856 rtx thumb_call_via_label
[14];
857 static int thumb_call_reg_needed
;
859 /* The bits in this mask specify which instruction scheduling options should
861 unsigned int tune_flags
= 0;
863 /* The highest ARM architecture version supported by the
865 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
867 /* Active target architecture and tuning. */
869 struct arm_build_target arm_active_target
;
871 /* The following are used in the arm.md file as equivalents to bits
872 in the above two flag variables. */
874 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
877 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
880 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
883 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
886 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
889 /* Nonzero if this chip supports the ARM 6K extensions. */
892 /* Nonzero if this chip supports the ARM 6KZ extensions. */
895 /* Nonzero if instructions present in ARMv6-M can be used. */
898 /* Nonzero if this chip supports the ARM 7 extensions. */
901 /* Nonzero if this chip supports the Large Physical Address Extension. */
902 int arm_arch_lpae
= 0;
904 /* Nonzero if instructions not present in the 'M' profile can be used. */
905 int arm_arch_notm
= 0;
907 /* Nonzero if instructions present in ARMv7E-M can be used. */
910 /* Nonzero if instructions present in ARMv8 can be used. */
913 /* Nonzero if this chip supports the ARMv8.1 extensions. */
916 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
919 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
922 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
924 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
926 int arm_arch8_1m_main
= 0;
928 /* Nonzero if this chip supports the FP16 instructions extension of ARM
930 int arm_fp16_inst
= 0;
932 /* Nonzero if this chip can benefit from load scheduling. */
933 int arm_ld_sched
= 0;
935 /* Nonzero if this chip is a StrongARM. */
936 int arm_tune_strongarm
= 0;
938 /* Nonzero if this chip supports Intel Wireless MMX technology. */
939 int arm_arch_iwmmxt
= 0;
941 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
942 int arm_arch_iwmmxt2
= 0;
944 /* Nonzero if this chip is an XScale. */
945 int arm_arch_xscale
= 0;
947 /* Nonzero if tuning for XScale */
948 int arm_tune_xscale
= 0;
950 /* Nonzero if we want to tune for stores that access the write-buffer.
951 This typically means an ARM6 or ARM7 with MMU or MPU. */
952 int arm_tune_wbuf
= 0;
954 /* Nonzero if tuning for Cortex-A9. */
955 int arm_tune_cortex_a9
= 0;
957 /* Nonzero if we should define __THUMB_INTERWORK__ in the
959 XXX This is a bit of a hack, it's intended to help work around
960 problems in GLD which doesn't understand that armv5t code is
961 interworking clean. */
962 int arm_cpp_interwork
= 0;
964 /* Nonzero if chip supports Thumb 1. */
967 /* Nonzero if chip supports Thumb 2. */
970 /* Nonzero if chip supports integer division instruction. */
971 int arm_arch_arm_hwdiv
;
972 int arm_arch_thumb_hwdiv
;
974 /* Nonzero if chip disallows volatile memory access in IT block. */
975 int arm_arch_no_volatile_ce
;
977 /* Nonzero if we shouldn't use literal pools. */
978 bool arm_disable_literal_pool
= false;
980 /* The register number to be used for the PIC offset register. */
981 unsigned arm_pic_register
= INVALID_REGNUM
;
983 enum arm_pcs arm_pcs_default
;
985 /* For an explanation of these variables, see final_prescan_insn below. */
987 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
988 enum arm_cond_code arm_current_cc
;
991 int arm_target_label
;
992 /* The number of conditionally executed insns, including the current insn. */
993 int arm_condexec_count
= 0;
994 /* A bitmask specifying the patterns for the IT block.
995 Zero means do not output an IT block before this insn. */
996 int arm_condexec_mask
= 0;
997 /* The number of bits used in arm_condexec_mask. */
998 int arm_condexec_masklen
= 0;
1000 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1001 int arm_arch_crc
= 0;
1003 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1004 int arm_arch_dotprod
= 0;
1006 /* Nonzero if chip supports the ARMv8-M security extensions. */
1007 int arm_arch_cmse
= 0;
1009 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1010 int arm_m_profile_small_mul
= 0;
1012 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1013 int arm_arch_i8mm
= 0;
1015 /* Nonzero if chip supports the BFloat16 instructions. */
1016 int arm_arch_bf16
= 0;
1018 /* Nonzero if chip supports the Custom Datapath Extension. */
1019 int arm_arch_cde
= 0;
1020 int arm_arch_cde_coproc
= 0;
1021 const int arm_arch_cde_coproc_bits
[] = {
1022 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1025 /* The condition codes of the ARM, and the inverse function. */
1026 static const char * const arm_condition_codes
[] =
1028 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1029 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1032 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1033 int arm_regs_in_sequence
[] =
1035 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1038 #define DEF_FP_SYSREG(reg) #reg,
1039 const char *fp_sysreg_names
[NB_FP_SYSREGS
] = {
1042 #undef DEF_FP_SYSREG
1044 #define ARM_LSL_NAME "lsl"
1045 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1047 #define THUMB2_WORK_REGS \
1048 (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1049 | (1 << SP_REGNUM) \
1050 | (1 << PC_REGNUM) \
1051 | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM \
1052 ? (1 << PIC_OFFSET_TABLE_REGNUM) \
1055 /* Initialization code. */
1059 enum processor_type scheduler
;
1060 unsigned int tune_flags
;
1061 const struct tune_params
*tune
;
1064 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1065 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1072 /* arm generic vectorizer costs. */
1074 struct cpu_vec_costs arm_default_vec_cost
= {
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 1, /* vec_unalign_load_cost. */
1083 1, /* vec_unalign_store_cost. */
1084 1, /* vec_store_cost. */
1085 3, /* cond_taken_branch_cost. */
1086 1, /* cond_not_taken_branch_cost. */
1089 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1090 #include "aarch-cost-tables.h"
1094 const struct cpu_cost_table cortexa9_extra_costs
=
1101 COSTS_N_INSNS (1), /* shift_reg. */
1102 COSTS_N_INSNS (1), /* arith_shift. */
1103 COSTS_N_INSNS (2), /* arith_shift_reg. */
1105 COSTS_N_INSNS (1), /* log_shift_reg. */
1106 COSTS_N_INSNS (1), /* extend. */
1107 COSTS_N_INSNS (2), /* extend_arith. */
1108 COSTS_N_INSNS (1), /* bfi. */
1109 COSTS_N_INSNS (1), /* bfx. */
1113 true /* non_exec_costs_exec. */
1118 COSTS_N_INSNS (3), /* simple. */
1119 COSTS_N_INSNS (3), /* flag_setting. */
1120 COSTS_N_INSNS (2), /* extend. */
1121 COSTS_N_INSNS (3), /* add. */
1122 COSTS_N_INSNS (2), /* extend_add. */
1123 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1127 0, /* simple (N/A). */
1128 0, /* flag_setting (N/A). */
1129 COSTS_N_INSNS (4), /* extend. */
1131 COSTS_N_INSNS (4), /* extend_add. */
1137 COSTS_N_INSNS (2), /* load. */
1138 COSTS_N_INSNS (2), /* load_sign_extend. */
1139 COSTS_N_INSNS (2), /* ldrd. */
1140 COSTS_N_INSNS (2), /* ldm_1st. */
1141 1, /* ldm_regs_per_insn_1st. */
1142 2, /* ldm_regs_per_insn_subsequent. */
1143 COSTS_N_INSNS (5), /* loadf. */
1144 COSTS_N_INSNS (5), /* loadd. */
1145 COSTS_N_INSNS (1), /* load_unaligned. */
1146 COSTS_N_INSNS (2), /* store. */
1147 COSTS_N_INSNS (2), /* strd. */
1148 COSTS_N_INSNS (2), /* stm_1st. */
1149 1, /* stm_regs_per_insn_1st. */
1150 2, /* stm_regs_per_insn_subsequent. */
1151 COSTS_N_INSNS (1), /* storef. */
1152 COSTS_N_INSNS (1), /* stored. */
1153 COSTS_N_INSNS (1), /* store_unaligned. */
1154 COSTS_N_INSNS (1), /* loadv. */
1155 COSTS_N_INSNS (1) /* storev. */
1160 COSTS_N_INSNS (14), /* div. */
1161 COSTS_N_INSNS (4), /* mult. */
1162 COSTS_N_INSNS (7), /* mult_addsub. */
1163 COSTS_N_INSNS (30), /* fma. */
1164 COSTS_N_INSNS (3), /* addsub. */
1165 COSTS_N_INSNS (1), /* fpconst. */
1166 COSTS_N_INSNS (1), /* neg. */
1167 COSTS_N_INSNS (3), /* compare. */
1168 COSTS_N_INSNS (3), /* widen. */
1169 COSTS_N_INSNS (3), /* narrow. */
1170 COSTS_N_INSNS (3), /* toint. */
1171 COSTS_N_INSNS (3), /* fromint. */
1172 COSTS_N_INSNS (3) /* roundint. */
1176 COSTS_N_INSNS (24), /* div. */
1177 COSTS_N_INSNS (5), /* mult. */
1178 COSTS_N_INSNS (8), /* mult_addsub. */
1179 COSTS_N_INSNS (30), /* fma. */
1180 COSTS_N_INSNS (3), /* addsub. */
1181 COSTS_N_INSNS (1), /* fpconst. */
1182 COSTS_N_INSNS (1), /* neg. */
1183 COSTS_N_INSNS (3), /* compare. */
1184 COSTS_N_INSNS (3), /* widen. */
1185 COSTS_N_INSNS (3), /* narrow. */
1186 COSTS_N_INSNS (3), /* toint. */
1187 COSTS_N_INSNS (3), /* fromint. */
1188 COSTS_N_INSNS (3) /* roundint. */
1193 COSTS_N_INSNS (1), /* alu. */
1194 COSTS_N_INSNS (4), /* mult. */
1195 COSTS_N_INSNS (1), /* movi. */
1196 COSTS_N_INSNS (2), /* dup. */
1197 COSTS_N_INSNS (2) /* extract. */
1201 const struct cpu_cost_table cortexa8_extra_costs
=
1207 COSTS_N_INSNS (1), /* shift. */
1209 COSTS_N_INSNS (1), /* arith_shift. */
1210 0, /* arith_shift_reg. */
1211 COSTS_N_INSNS (1), /* log_shift. */
1212 0, /* log_shift_reg. */
1214 0, /* extend_arith. */
1220 true /* non_exec_costs_exec. */
1225 COSTS_N_INSNS (1), /* simple. */
1226 COSTS_N_INSNS (1), /* flag_setting. */
1227 COSTS_N_INSNS (1), /* extend. */
1228 COSTS_N_INSNS (1), /* add. */
1229 COSTS_N_INSNS (1), /* extend_add. */
1230 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1234 0, /* simple (N/A). */
1235 0, /* flag_setting (N/A). */
1236 COSTS_N_INSNS (2), /* extend. */
1238 COSTS_N_INSNS (2), /* extend_add. */
1244 COSTS_N_INSNS (1), /* load. */
1245 COSTS_N_INSNS (1), /* load_sign_extend. */
1246 COSTS_N_INSNS (1), /* ldrd. */
1247 COSTS_N_INSNS (1), /* ldm_1st. */
1248 1, /* ldm_regs_per_insn_1st. */
1249 2, /* ldm_regs_per_insn_subsequent. */
1250 COSTS_N_INSNS (1), /* loadf. */
1251 COSTS_N_INSNS (1), /* loadd. */
1252 COSTS_N_INSNS (1), /* load_unaligned. */
1253 COSTS_N_INSNS (1), /* store. */
1254 COSTS_N_INSNS (1), /* strd. */
1255 COSTS_N_INSNS (1), /* stm_1st. */
1256 1, /* stm_regs_per_insn_1st. */
1257 2, /* stm_regs_per_insn_subsequent. */
1258 COSTS_N_INSNS (1), /* storef. */
1259 COSTS_N_INSNS (1), /* stored. */
1260 COSTS_N_INSNS (1), /* store_unaligned. */
1261 COSTS_N_INSNS (1), /* loadv. */
1262 COSTS_N_INSNS (1) /* storev. */
1267 COSTS_N_INSNS (36), /* div. */
1268 COSTS_N_INSNS (11), /* mult. */
1269 COSTS_N_INSNS (20), /* mult_addsub. */
1270 COSTS_N_INSNS (30), /* fma. */
1271 COSTS_N_INSNS (9), /* addsub. */
1272 COSTS_N_INSNS (3), /* fpconst. */
1273 COSTS_N_INSNS (3), /* neg. */
1274 COSTS_N_INSNS (6), /* compare. */
1275 COSTS_N_INSNS (4), /* widen. */
1276 COSTS_N_INSNS (4), /* narrow. */
1277 COSTS_N_INSNS (8), /* toint. */
1278 COSTS_N_INSNS (8), /* fromint. */
1279 COSTS_N_INSNS (8) /* roundint. */
1283 COSTS_N_INSNS (64), /* div. */
1284 COSTS_N_INSNS (16), /* mult. */
1285 COSTS_N_INSNS (25), /* mult_addsub. */
1286 COSTS_N_INSNS (30), /* fma. */
1287 COSTS_N_INSNS (9), /* addsub. */
1288 COSTS_N_INSNS (3), /* fpconst. */
1289 COSTS_N_INSNS (3), /* neg. */
1290 COSTS_N_INSNS (6), /* compare. */
1291 COSTS_N_INSNS (6), /* widen. */
1292 COSTS_N_INSNS (6), /* narrow. */
1293 COSTS_N_INSNS (8), /* toint. */
1294 COSTS_N_INSNS (8), /* fromint. */
1295 COSTS_N_INSNS (8) /* roundint. */
1300 COSTS_N_INSNS (1), /* alu. */
1301 COSTS_N_INSNS (4), /* mult. */
1302 COSTS_N_INSNS (1), /* movi. */
1303 COSTS_N_INSNS (2), /* dup. */
1304 COSTS_N_INSNS (2) /* extract. */
1308 const struct cpu_cost_table cortexa5_extra_costs
=
1314 COSTS_N_INSNS (1), /* shift. */
1315 COSTS_N_INSNS (1), /* shift_reg. */
1316 COSTS_N_INSNS (1), /* arith_shift. */
1317 COSTS_N_INSNS (1), /* arith_shift_reg. */
1318 COSTS_N_INSNS (1), /* log_shift. */
1319 COSTS_N_INSNS (1), /* log_shift_reg. */
1320 COSTS_N_INSNS (1), /* extend. */
1321 COSTS_N_INSNS (1), /* extend_arith. */
1322 COSTS_N_INSNS (1), /* bfi. */
1323 COSTS_N_INSNS (1), /* bfx. */
1324 COSTS_N_INSNS (1), /* clz. */
1325 COSTS_N_INSNS (1), /* rev. */
1327 true /* non_exec_costs_exec. */
1334 COSTS_N_INSNS (1), /* flag_setting. */
1335 COSTS_N_INSNS (1), /* extend. */
1336 COSTS_N_INSNS (1), /* add. */
1337 COSTS_N_INSNS (1), /* extend_add. */
1338 COSTS_N_INSNS (7) /* idiv. */
1342 0, /* simple (N/A). */
1343 0, /* flag_setting (N/A). */
1344 COSTS_N_INSNS (1), /* extend. */
1346 COSTS_N_INSNS (2), /* extend_add. */
1352 COSTS_N_INSNS (1), /* load. */
1353 COSTS_N_INSNS (1), /* load_sign_extend. */
1354 COSTS_N_INSNS (6), /* ldrd. */
1355 COSTS_N_INSNS (1), /* ldm_1st. */
1356 1, /* ldm_regs_per_insn_1st. */
1357 2, /* ldm_regs_per_insn_subsequent. */
1358 COSTS_N_INSNS (2), /* loadf. */
1359 COSTS_N_INSNS (4), /* loadd. */
1360 COSTS_N_INSNS (1), /* load_unaligned. */
1361 COSTS_N_INSNS (1), /* store. */
1362 COSTS_N_INSNS (3), /* strd. */
1363 COSTS_N_INSNS (1), /* stm_1st. */
1364 1, /* stm_regs_per_insn_1st. */
1365 2, /* stm_regs_per_insn_subsequent. */
1366 COSTS_N_INSNS (2), /* storef. */
1367 COSTS_N_INSNS (2), /* stored. */
1368 COSTS_N_INSNS (1), /* store_unaligned. */
1369 COSTS_N_INSNS (1), /* loadv. */
1370 COSTS_N_INSNS (1) /* storev. */
1375 COSTS_N_INSNS (15), /* div. */
1376 COSTS_N_INSNS (3), /* mult. */
1377 COSTS_N_INSNS (7), /* mult_addsub. */
1378 COSTS_N_INSNS (7), /* fma. */
1379 COSTS_N_INSNS (3), /* addsub. */
1380 COSTS_N_INSNS (3), /* fpconst. */
1381 COSTS_N_INSNS (3), /* neg. */
1382 COSTS_N_INSNS (3), /* compare. */
1383 COSTS_N_INSNS (3), /* widen. */
1384 COSTS_N_INSNS (3), /* narrow. */
1385 COSTS_N_INSNS (3), /* toint. */
1386 COSTS_N_INSNS (3), /* fromint. */
1387 COSTS_N_INSNS (3) /* roundint. */
1391 COSTS_N_INSNS (30), /* div. */
1392 COSTS_N_INSNS (6), /* mult. */
1393 COSTS_N_INSNS (10), /* mult_addsub. */
1394 COSTS_N_INSNS (7), /* fma. */
1395 COSTS_N_INSNS (3), /* addsub. */
1396 COSTS_N_INSNS (3), /* fpconst. */
1397 COSTS_N_INSNS (3), /* neg. */
1398 COSTS_N_INSNS (3), /* compare. */
1399 COSTS_N_INSNS (3), /* widen. */
1400 COSTS_N_INSNS (3), /* narrow. */
1401 COSTS_N_INSNS (3), /* toint. */
1402 COSTS_N_INSNS (3), /* fromint. */
1403 COSTS_N_INSNS (3) /* roundint. */
1408 COSTS_N_INSNS (1), /* alu. */
1409 COSTS_N_INSNS (4), /* mult. */
1410 COSTS_N_INSNS (1), /* movi. */
1411 COSTS_N_INSNS (2), /* dup. */
1412 COSTS_N_INSNS (2) /* extract. */
1417 const struct cpu_cost_table cortexa7_extra_costs
=
1423 COSTS_N_INSNS (1), /* shift. */
1424 COSTS_N_INSNS (1), /* shift_reg. */
1425 COSTS_N_INSNS (1), /* arith_shift. */
1426 COSTS_N_INSNS (1), /* arith_shift_reg. */
1427 COSTS_N_INSNS (1), /* log_shift. */
1428 COSTS_N_INSNS (1), /* log_shift_reg. */
1429 COSTS_N_INSNS (1), /* extend. */
1430 COSTS_N_INSNS (1), /* extend_arith. */
1431 COSTS_N_INSNS (1), /* bfi. */
1432 COSTS_N_INSNS (1), /* bfx. */
1433 COSTS_N_INSNS (1), /* clz. */
1434 COSTS_N_INSNS (1), /* rev. */
1436 true /* non_exec_costs_exec. */
1443 COSTS_N_INSNS (1), /* flag_setting. */
1444 COSTS_N_INSNS (1), /* extend. */
1445 COSTS_N_INSNS (1), /* add. */
1446 COSTS_N_INSNS (1), /* extend_add. */
1447 COSTS_N_INSNS (7) /* idiv. */
1451 0, /* simple (N/A). */
1452 0, /* flag_setting (N/A). */
1453 COSTS_N_INSNS (1), /* extend. */
1455 COSTS_N_INSNS (2), /* extend_add. */
1461 COSTS_N_INSNS (1), /* load. */
1462 COSTS_N_INSNS (1), /* load_sign_extend. */
1463 COSTS_N_INSNS (3), /* ldrd. */
1464 COSTS_N_INSNS (1), /* ldm_1st. */
1465 1, /* ldm_regs_per_insn_1st. */
1466 2, /* ldm_regs_per_insn_subsequent. */
1467 COSTS_N_INSNS (2), /* loadf. */
1468 COSTS_N_INSNS (2), /* loadd. */
1469 COSTS_N_INSNS (1), /* load_unaligned. */
1470 COSTS_N_INSNS (1), /* store. */
1471 COSTS_N_INSNS (3), /* strd. */
1472 COSTS_N_INSNS (1), /* stm_1st. */
1473 1, /* stm_regs_per_insn_1st. */
1474 2, /* stm_regs_per_insn_subsequent. */
1475 COSTS_N_INSNS (2), /* storef. */
1476 COSTS_N_INSNS (2), /* stored. */
1477 COSTS_N_INSNS (1), /* store_unaligned. */
1478 COSTS_N_INSNS (1), /* loadv. */
1479 COSTS_N_INSNS (1) /* storev. */
1484 COSTS_N_INSNS (15), /* div. */
1485 COSTS_N_INSNS (3), /* mult. */
1486 COSTS_N_INSNS (7), /* mult_addsub. */
1487 COSTS_N_INSNS (7), /* fma. */
1488 COSTS_N_INSNS (3), /* addsub. */
1489 COSTS_N_INSNS (3), /* fpconst. */
1490 COSTS_N_INSNS (3), /* neg. */
1491 COSTS_N_INSNS (3), /* compare. */
1492 COSTS_N_INSNS (3), /* widen. */
1493 COSTS_N_INSNS (3), /* narrow. */
1494 COSTS_N_INSNS (3), /* toint. */
1495 COSTS_N_INSNS (3), /* fromint. */
1496 COSTS_N_INSNS (3) /* roundint. */
1500 COSTS_N_INSNS (30), /* div. */
1501 COSTS_N_INSNS (6), /* mult. */
1502 COSTS_N_INSNS (10), /* mult_addsub. */
1503 COSTS_N_INSNS (7), /* fma. */
1504 COSTS_N_INSNS (3), /* addsub. */
1505 COSTS_N_INSNS (3), /* fpconst. */
1506 COSTS_N_INSNS (3), /* neg. */
1507 COSTS_N_INSNS (3), /* compare. */
1508 COSTS_N_INSNS (3), /* widen. */
1509 COSTS_N_INSNS (3), /* narrow. */
1510 COSTS_N_INSNS (3), /* toint. */
1511 COSTS_N_INSNS (3), /* fromint. */
1512 COSTS_N_INSNS (3) /* roundint. */
1517 COSTS_N_INSNS (1), /* alu. */
1518 COSTS_N_INSNS (4), /* mult. */
1519 COSTS_N_INSNS (1), /* movi. */
1520 COSTS_N_INSNS (2), /* dup. */
1521 COSTS_N_INSNS (2) /* extract. */
1525 const struct cpu_cost_table cortexa12_extra_costs
=
1532 COSTS_N_INSNS (1), /* shift_reg. */
1533 COSTS_N_INSNS (1), /* arith_shift. */
1534 COSTS_N_INSNS (1), /* arith_shift_reg. */
1535 COSTS_N_INSNS (1), /* log_shift. */
1536 COSTS_N_INSNS (1), /* log_shift_reg. */
1538 COSTS_N_INSNS (1), /* extend_arith. */
1540 COSTS_N_INSNS (1), /* bfx. */
1541 COSTS_N_INSNS (1), /* clz. */
1542 COSTS_N_INSNS (1), /* rev. */
1544 true /* non_exec_costs_exec. */
1549 COSTS_N_INSNS (2), /* simple. */
1550 COSTS_N_INSNS (3), /* flag_setting. */
1551 COSTS_N_INSNS (2), /* extend. */
1552 COSTS_N_INSNS (3), /* add. */
1553 COSTS_N_INSNS (2), /* extend_add. */
1554 COSTS_N_INSNS (18) /* idiv. */
1558 0, /* simple (N/A). */
1559 0, /* flag_setting (N/A). */
1560 COSTS_N_INSNS (3), /* extend. */
1562 COSTS_N_INSNS (3), /* extend_add. */
1568 COSTS_N_INSNS (3), /* load. */
1569 COSTS_N_INSNS (3), /* load_sign_extend. */
1570 COSTS_N_INSNS (3), /* ldrd. */
1571 COSTS_N_INSNS (3), /* ldm_1st. */
1572 1, /* ldm_regs_per_insn_1st. */
1573 2, /* ldm_regs_per_insn_subsequent. */
1574 COSTS_N_INSNS (3), /* loadf. */
1575 COSTS_N_INSNS (3), /* loadd. */
1576 0, /* load_unaligned. */
1580 1, /* stm_regs_per_insn_1st. */
1581 2, /* stm_regs_per_insn_subsequent. */
1582 COSTS_N_INSNS (2), /* storef. */
1583 COSTS_N_INSNS (2), /* stored. */
1584 0, /* store_unaligned. */
1585 COSTS_N_INSNS (1), /* loadv. */
1586 COSTS_N_INSNS (1) /* storev. */
1591 COSTS_N_INSNS (17), /* div. */
1592 COSTS_N_INSNS (4), /* mult. */
1593 COSTS_N_INSNS (8), /* mult_addsub. */
1594 COSTS_N_INSNS (8), /* fma. */
1595 COSTS_N_INSNS (4), /* addsub. */
1596 COSTS_N_INSNS (2), /* fpconst. */
1597 COSTS_N_INSNS (2), /* neg. */
1598 COSTS_N_INSNS (2), /* compare. */
1599 COSTS_N_INSNS (4), /* widen. */
1600 COSTS_N_INSNS (4), /* narrow. */
1601 COSTS_N_INSNS (4), /* toint. */
1602 COSTS_N_INSNS (4), /* fromint. */
1603 COSTS_N_INSNS (4) /* roundint. */
1607 COSTS_N_INSNS (31), /* div. */
1608 COSTS_N_INSNS (4), /* mult. */
1609 COSTS_N_INSNS (8), /* mult_addsub. */
1610 COSTS_N_INSNS (8), /* fma. */
1611 COSTS_N_INSNS (4), /* addsub. */
1612 COSTS_N_INSNS (2), /* fpconst. */
1613 COSTS_N_INSNS (2), /* neg. */
1614 COSTS_N_INSNS (2), /* compare. */
1615 COSTS_N_INSNS (4), /* widen. */
1616 COSTS_N_INSNS (4), /* narrow. */
1617 COSTS_N_INSNS (4), /* toint. */
1618 COSTS_N_INSNS (4), /* fromint. */
1619 COSTS_N_INSNS (4) /* roundint. */
1624 COSTS_N_INSNS (1), /* alu. */
1625 COSTS_N_INSNS (4), /* mult. */
1626 COSTS_N_INSNS (1), /* movi. */
1627 COSTS_N_INSNS (2), /* dup. */
1628 COSTS_N_INSNS (2) /* extract. */
1632 const struct cpu_cost_table cortexa15_extra_costs
=
1640 COSTS_N_INSNS (1), /* arith_shift. */
1641 COSTS_N_INSNS (1), /* arith_shift_reg. */
1642 COSTS_N_INSNS (1), /* log_shift. */
1643 COSTS_N_INSNS (1), /* log_shift_reg. */
1645 COSTS_N_INSNS (1), /* extend_arith. */
1646 COSTS_N_INSNS (1), /* bfi. */
1651 true /* non_exec_costs_exec. */
1656 COSTS_N_INSNS (2), /* simple. */
1657 COSTS_N_INSNS (3), /* flag_setting. */
1658 COSTS_N_INSNS (2), /* extend. */
1659 COSTS_N_INSNS (2), /* add. */
1660 COSTS_N_INSNS (2), /* extend_add. */
1661 COSTS_N_INSNS (18) /* idiv. */
1665 0, /* simple (N/A). */
1666 0, /* flag_setting (N/A). */
1667 COSTS_N_INSNS (3), /* extend. */
1669 COSTS_N_INSNS (3), /* extend_add. */
1675 COSTS_N_INSNS (3), /* load. */
1676 COSTS_N_INSNS (3), /* load_sign_extend. */
1677 COSTS_N_INSNS (3), /* ldrd. */
1678 COSTS_N_INSNS (4), /* ldm_1st. */
1679 1, /* ldm_regs_per_insn_1st. */
1680 2, /* ldm_regs_per_insn_subsequent. */
1681 COSTS_N_INSNS (4), /* loadf. */
1682 COSTS_N_INSNS (4), /* loadd. */
1683 0, /* load_unaligned. */
1686 COSTS_N_INSNS (1), /* stm_1st. */
1687 1, /* stm_regs_per_insn_1st. */
1688 2, /* stm_regs_per_insn_subsequent. */
1691 0, /* store_unaligned. */
1692 COSTS_N_INSNS (1), /* loadv. */
1693 COSTS_N_INSNS (1) /* storev. */
1698 COSTS_N_INSNS (17), /* div. */
1699 COSTS_N_INSNS (4), /* mult. */
1700 COSTS_N_INSNS (8), /* mult_addsub. */
1701 COSTS_N_INSNS (8), /* fma. */
1702 COSTS_N_INSNS (4), /* addsub. */
1703 COSTS_N_INSNS (2), /* fpconst. */
1704 COSTS_N_INSNS (2), /* neg. */
1705 COSTS_N_INSNS (5), /* compare. */
1706 COSTS_N_INSNS (4), /* widen. */
1707 COSTS_N_INSNS (4), /* narrow. */
1708 COSTS_N_INSNS (4), /* toint. */
1709 COSTS_N_INSNS (4), /* fromint. */
1710 COSTS_N_INSNS (4) /* roundint. */
1714 COSTS_N_INSNS (31), /* div. */
1715 COSTS_N_INSNS (4), /* mult. */
1716 COSTS_N_INSNS (8), /* mult_addsub. */
1717 COSTS_N_INSNS (8), /* fma. */
1718 COSTS_N_INSNS (4), /* addsub. */
1719 COSTS_N_INSNS (2), /* fpconst. */
1720 COSTS_N_INSNS (2), /* neg. */
1721 COSTS_N_INSNS (2), /* compare. */
1722 COSTS_N_INSNS (4), /* widen. */
1723 COSTS_N_INSNS (4), /* narrow. */
1724 COSTS_N_INSNS (4), /* toint. */
1725 COSTS_N_INSNS (4), /* fromint. */
1726 COSTS_N_INSNS (4) /* roundint. */
1731 COSTS_N_INSNS (1), /* alu. */
1732 COSTS_N_INSNS (4), /* mult. */
1733 COSTS_N_INSNS (1), /* movi. */
1734 COSTS_N_INSNS (2), /* dup. */
1735 COSTS_N_INSNS (2) /* extract. */
1739 const struct cpu_cost_table v7m_extra_costs
=
1747 0, /* arith_shift. */
1748 COSTS_N_INSNS (1), /* arith_shift_reg. */
1750 COSTS_N_INSNS (1), /* log_shift_reg. */
1752 COSTS_N_INSNS (1), /* extend_arith. */
1757 COSTS_N_INSNS (1), /* non_exec. */
1758 false /* non_exec_costs_exec. */
1763 COSTS_N_INSNS (1), /* simple. */
1764 COSTS_N_INSNS (1), /* flag_setting. */
1765 COSTS_N_INSNS (2), /* extend. */
1766 COSTS_N_INSNS (1), /* add. */
1767 COSTS_N_INSNS (3), /* extend_add. */
1768 COSTS_N_INSNS (8) /* idiv. */
1772 0, /* simple (N/A). */
1773 0, /* flag_setting (N/A). */
1774 COSTS_N_INSNS (2), /* extend. */
1776 COSTS_N_INSNS (3), /* extend_add. */
1782 COSTS_N_INSNS (2), /* load. */
1783 0, /* load_sign_extend. */
1784 COSTS_N_INSNS (3), /* ldrd. */
1785 COSTS_N_INSNS (2), /* ldm_1st. */
1786 1, /* ldm_regs_per_insn_1st. */
1787 1, /* ldm_regs_per_insn_subsequent. */
1788 COSTS_N_INSNS (2), /* loadf. */
1789 COSTS_N_INSNS (3), /* loadd. */
1790 COSTS_N_INSNS (1), /* load_unaligned. */
1791 COSTS_N_INSNS (2), /* store. */
1792 COSTS_N_INSNS (3), /* strd. */
1793 COSTS_N_INSNS (2), /* stm_1st. */
1794 1, /* stm_regs_per_insn_1st. */
1795 1, /* stm_regs_per_insn_subsequent. */
1796 COSTS_N_INSNS (2), /* storef. */
1797 COSTS_N_INSNS (3), /* stored. */
1798 COSTS_N_INSNS (1), /* store_unaligned. */
1799 COSTS_N_INSNS (1), /* loadv. */
1800 COSTS_N_INSNS (1) /* storev. */
1805 COSTS_N_INSNS (7), /* div. */
1806 COSTS_N_INSNS (2), /* mult. */
1807 COSTS_N_INSNS (5), /* mult_addsub. */
1808 COSTS_N_INSNS (3), /* fma. */
1809 COSTS_N_INSNS (1), /* addsub. */
1821 COSTS_N_INSNS (15), /* div. */
1822 COSTS_N_INSNS (5), /* mult. */
1823 COSTS_N_INSNS (7), /* mult_addsub. */
1824 COSTS_N_INSNS (7), /* fma. */
1825 COSTS_N_INSNS (3), /* addsub. */
1838 COSTS_N_INSNS (1), /* alu. */
1839 COSTS_N_INSNS (4), /* mult. */
1840 COSTS_N_INSNS (1), /* movi. */
1841 COSTS_N_INSNS (2), /* dup. */
1842 COSTS_N_INSNS (2) /* extract. */
1846 const struct addr_mode_cost_table generic_addr_mode_costs
=
1850 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1851 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1852 COSTS_N_INSNS (0) /* AMO_WB. */
1856 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1857 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1858 COSTS_N_INSNS (0) /* AMO_WB. */
1862 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1863 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1864 COSTS_N_INSNS (0) /* AMO_WB. */
1868 const struct tune_params arm_slowmul_tune
=
1870 &generic_extra_costs
, /* Insn extra costs. */
1871 &generic_addr_mode_costs
, /* Addressing mode costs. */
1872 NULL
, /* Sched adj cost. */
1873 arm_default_branch_cost
,
1874 &arm_default_vec_cost
,
1875 3, /* Constant limit. */
1876 5, /* Max cond insns. */
1877 8, /* Memset max inline. */
1878 1, /* Issue rate. */
1879 ARM_PREFETCH_NOT_BENEFICIAL
,
1880 tune_params::PREF_CONST_POOL_TRUE
,
1881 tune_params::PREF_LDRD_FALSE
,
1882 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1883 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1884 tune_params::DISPARAGE_FLAGS_NEITHER
,
1885 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1886 tune_params::FUSE_NOTHING
,
1887 tune_params::SCHED_AUTOPREF_OFF
1890 const struct tune_params arm_fastmul_tune
=
1892 &generic_extra_costs
, /* Insn extra costs. */
1893 &generic_addr_mode_costs
, /* Addressing mode costs. */
1894 NULL
, /* Sched adj cost. */
1895 arm_default_branch_cost
,
1896 &arm_default_vec_cost
,
1897 1, /* Constant limit. */
1898 5, /* Max cond insns. */
1899 8, /* Memset max inline. */
1900 1, /* Issue rate. */
1901 ARM_PREFETCH_NOT_BENEFICIAL
,
1902 tune_params::PREF_CONST_POOL_TRUE
,
1903 tune_params::PREF_LDRD_FALSE
,
1904 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1905 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1906 tune_params::DISPARAGE_FLAGS_NEITHER
,
1907 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1908 tune_params::FUSE_NOTHING
,
1909 tune_params::SCHED_AUTOPREF_OFF
1912 /* StrongARM has early execution of branches, so a sequence that is worth
1913 skipping is shorter. Set max_insns_skipped to a lower value. */
1915 const struct tune_params arm_strongarm_tune
=
1917 &generic_extra_costs
, /* Insn extra costs. */
1918 &generic_addr_mode_costs
, /* Addressing mode costs. */
1919 NULL
, /* Sched adj cost. */
1920 arm_default_branch_cost
,
1921 &arm_default_vec_cost
,
1922 1, /* Constant limit. */
1923 3, /* Max cond insns. */
1924 8, /* Memset max inline. */
1925 1, /* Issue rate. */
1926 ARM_PREFETCH_NOT_BENEFICIAL
,
1927 tune_params::PREF_CONST_POOL_TRUE
,
1928 tune_params::PREF_LDRD_FALSE
,
1929 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1930 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1931 tune_params::DISPARAGE_FLAGS_NEITHER
,
1932 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1933 tune_params::FUSE_NOTHING
,
1934 tune_params::SCHED_AUTOPREF_OFF
1937 const struct tune_params arm_xscale_tune
=
1939 &generic_extra_costs
, /* Insn extra costs. */
1940 &generic_addr_mode_costs
, /* Addressing mode costs. */
1941 xscale_sched_adjust_cost
,
1942 arm_default_branch_cost
,
1943 &arm_default_vec_cost
,
1944 2, /* Constant limit. */
1945 3, /* Max cond insns. */
1946 8, /* Memset max inline. */
1947 1, /* Issue rate. */
1948 ARM_PREFETCH_NOT_BENEFICIAL
,
1949 tune_params::PREF_CONST_POOL_TRUE
,
1950 tune_params::PREF_LDRD_FALSE
,
1951 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1952 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1953 tune_params::DISPARAGE_FLAGS_NEITHER
,
1954 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1955 tune_params::FUSE_NOTHING
,
1956 tune_params::SCHED_AUTOPREF_OFF
1959 const struct tune_params arm_9e_tune
=
1961 &generic_extra_costs
, /* Insn extra costs. */
1962 &generic_addr_mode_costs
, /* Addressing mode costs. */
1963 NULL
, /* Sched adj cost. */
1964 arm_default_branch_cost
,
1965 &arm_default_vec_cost
,
1966 1, /* Constant limit. */
1967 5, /* Max cond insns. */
1968 8, /* Memset max inline. */
1969 1, /* Issue rate. */
1970 ARM_PREFETCH_NOT_BENEFICIAL
,
1971 tune_params::PREF_CONST_POOL_TRUE
,
1972 tune_params::PREF_LDRD_FALSE
,
1973 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1974 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1975 tune_params::DISPARAGE_FLAGS_NEITHER
,
1976 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1977 tune_params::FUSE_NOTHING
,
1978 tune_params::SCHED_AUTOPREF_OFF
1981 const struct tune_params arm_marvell_pj4_tune
=
1983 &generic_extra_costs
, /* Insn extra costs. */
1984 &generic_addr_mode_costs
, /* Addressing mode costs. */
1985 NULL
, /* Sched adj cost. */
1986 arm_default_branch_cost
,
1987 &arm_default_vec_cost
,
1988 1, /* Constant limit. */
1989 5, /* Max cond insns. */
1990 8, /* Memset max inline. */
1991 2, /* Issue rate. */
1992 ARM_PREFETCH_NOT_BENEFICIAL
,
1993 tune_params::PREF_CONST_POOL_TRUE
,
1994 tune_params::PREF_LDRD_FALSE
,
1995 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1996 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1997 tune_params::DISPARAGE_FLAGS_NEITHER
,
1998 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1999 tune_params::FUSE_NOTHING
,
2000 tune_params::SCHED_AUTOPREF_OFF
2003 const struct tune_params arm_v6t2_tune
=
2005 &generic_extra_costs
, /* Insn extra costs. */
2006 &generic_addr_mode_costs
, /* Addressing mode costs. */
2007 NULL
, /* Sched adj cost. */
2008 arm_default_branch_cost
,
2009 &arm_default_vec_cost
,
2010 1, /* Constant limit. */
2011 5, /* Max cond insns. */
2012 8, /* Memset max inline. */
2013 1, /* Issue rate. */
2014 ARM_PREFETCH_NOT_BENEFICIAL
,
2015 tune_params::PREF_CONST_POOL_FALSE
,
2016 tune_params::PREF_LDRD_FALSE
,
2017 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2019 tune_params::DISPARAGE_FLAGS_NEITHER
,
2020 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2021 tune_params::FUSE_NOTHING
,
2022 tune_params::SCHED_AUTOPREF_OFF
2026 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2027 const struct tune_params arm_cortex_tune
=
2029 &generic_extra_costs
,
2030 &generic_addr_mode_costs
, /* Addressing mode costs. */
2031 NULL
, /* Sched adj cost. */
2032 arm_default_branch_cost
,
2033 &arm_default_vec_cost
,
2034 1, /* Constant limit. */
2035 5, /* Max cond insns. */
2036 8, /* Memset max inline. */
2037 2, /* Issue rate. */
2038 ARM_PREFETCH_NOT_BENEFICIAL
,
2039 tune_params::PREF_CONST_POOL_FALSE
,
2040 tune_params::PREF_LDRD_FALSE
,
2041 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2042 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2043 tune_params::DISPARAGE_FLAGS_NEITHER
,
2044 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2045 tune_params::FUSE_NOTHING
,
2046 tune_params::SCHED_AUTOPREF_OFF
2049 const struct tune_params arm_cortex_a8_tune
=
2051 &cortexa8_extra_costs
,
2052 &generic_addr_mode_costs
, /* Addressing mode costs. */
2053 NULL
, /* Sched adj cost. */
2054 arm_default_branch_cost
,
2055 &arm_default_vec_cost
,
2056 1, /* Constant limit. */
2057 5, /* Max cond insns. */
2058 8, /* Memset max inline. */
2059 2, /* Issue rate. */
2060 ARM_PREFETCH_NOT_BENEFICIAL
,
2061 tune_params::PREF_CONST_POOL_FALSE
,
2062 tune_params::PREF_LDRD_FALSE
,
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2064 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2065 tune_params::DISPARAGE_FLAGS_NEITHER
,
2066 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2067 tune_params::FUSE_NOTHING
,
2068 tune_params::SCHED_AUTOPREF_OFF
2071 const struct tune_params arm_cortex_a7_tune
=
2073 &cortexa7_extra_costs
,
2074 &generic_addr_mode_costs
, /* Addressing mode costs. */
2075 NULL
, /* Sched adj cost. */
2076 arm_default_branch_cost
,
2077 &arm_default_vec_cost
,
2078 1, /* Constant limit. */
2079 5, /* Max cond insns. */
2080 8, /* Memset max inline. */
2081 2, /* Issue rate. */
2082 ARM_PREFETCH_NOT_BENEFICIAL
,
2083 tune_params::PREF_CONST_POOL_FALSE
,
2084 tune_params::PREF_LDRD_FALSE
,
2085 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2086 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2087 tune_params::DISPARAGE_FLAGS_NEITHER
,
2088 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2089 tune_params::FUSE_NOTHING
,
2090 tune_params::SCHED_AUTOPREF_OFF
2093 const struct tune_params arm_cortex_a15_tune
=
2095 &cortexa15_extra_costs
,
2096 &generic_addr_mode_costs
, /* Addressing mode costs. */
2097 NULL
, /* Sched adj cost. */
2098 arm_default_branch_cost
,
2099 &arm_default_vec_cost
,
2100 1, /* Constant limit. */
2101 2, /* Max cond insns. */
2102 8, /* Memset max inline. */
2103 3, /* Issue rate. */
2104 ARM_PREFETCH_NOT_BENEFICIAL
,
2105 tune_params::PREF_CONST_POOL_FALSE
,
2106 tune_params::PREF_LDRD_TRUE
,
2107 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2108 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2109 tune_params::DISPARAGE_FLAGS_ALL
,
2110 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2111 tune_params::FUSE_NOTHING
,
2112 tune_params::SCHED_AUTOPREF_FULL
2115 const struct tune_params arm_cortex_a35_tune
=
2117 &cortexa53_extra_costs
,
2118 &generic_addr_mode_costs
, /* Addressing mode costs. */
2119 NULL
, /* Sched adj cost. */
2120 arm_default_branch_cost
,
2121 &arm_default_vec_cost
,
2122 1, /* Constant limit. */
2123 5, /* Max cond insns. */
2124 8, /* Memset max inline. */
2125 1, /* Issue rate. */
2126 ARM_PREFETCH_NOT_BENEFICIAL
,
2127 tune_params::PREF_CONST_POOL_FALSE
,
2128 tune_params::PREF_LDRD_FALSE
,
2129 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2130 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2131 tune_params::DISPARAGE_FLAGS_NEITHER
,
2132 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2133 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2134 tune_params::SCHED_AUTOPREF_OFF
2137 const struct tune_params arm_cortex_a53_tune
=
2139 &cortexa53_extra_costs
,
2140 &generic_addr_mode_costs
, /* Addressing mode costs. */
2141 NULL
, /* Sched adj cost. */
2142 arm_default_branch_cost
,
2143 &arm_default_vec_cost
,
2144 1, /* Constant limit. */
2145 5, /* Max cond insns. */
2146 8, /* Memset max inline. */
2147 2, /* Issue rate. */
2148 ARM_PREFETCH_NOT_BENEFICIAL
,
2149 tune_params::PREF_CONST_POOL_FALSE
,
2150 tune_params::PREF_LDRD_FALSE
,
2151 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2152 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2153 tune_params::DISPARAGE_FLAGS_NEITHER
,
2154 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2155 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2156 tune_params::SCHED_AUTOPREF_OFF
2159 const struct tune_params arm_cortex_a57_tune
=
2161 &cortexa57_extra_costs
,
2162 &generic_addr_mode_costs
, /* addressing mode costs */
2163 NULL
, /* Sched adj cost. */
2164 arm_default_branch_cost
,
2165 &arm_default_vec_cost
,
2166 1, /* Constant limit. */
2167 2, /* Max cond insns. */
2168 8, /* Memset max inline. */
2169 3, /* Issue rate. */
2170 ARM_PREFETCH_NOT_BENEFICIAL
,
2171 tune_params::PREF_CONST_POOL_FALSE
,
2172 tune_params::PREF_LDRD_TRUE
,
2173 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2174 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2175 tune_params::DISPARAGE_FLAGS_ALL
,
2176 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2177 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2178 tune_params::SCHED_AUTOPREF_FULL
2181 const struct tune_params arm_exynosm1_tune
=
2183 &exynosm1_extra_costs
,
2184 &generic_addr_mode_costs
, /* Addressing mode costs. */
2185 NULL
, /* Sched adj cost. */
2186 arm_default_branch_cost
,
2187 &arm_default_vec_cost
,
2188 1, /* Constant limit. */
2189 2, /* Max cond insns. */
2190 8, /* Memset max inline. */
2191 3, /* Issue rate. */
2192 ARM_PREFETCH_NOT_BENEFICIAL
,
2193 tune_params::PREF_CONST_POOL_FALSE
,
2194 tune_params::PREF_LDRD_TRUE
,
2195 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2196 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2197 tune_params::DISPARAGE_FLAGS_ALL
,
2198 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2199 tune_params::FUSE_NOTHING
,
2200 tune_params::SCHED_AUTOPREF_OFF
2203 const struct tune_params arm_xgene1_tune
=
2205 &xgene1_extra_costs
,
2206 &generic_addr_mode_costs
, /* Addressing mode costs. */
2207 NULL
, /* Sched adj cost. */
2208 arm_default_branch_cost
,
2209 &arm_default_vec_cost
,
2210 1, /* Constant limit. */
2211 2, /* Max cond insns. */
2212 32, /* Memset max inline. */
2213 4, /* Issue rate. */
2214 ARM_PREFETCH_NOT_BENEFICIAL
,
2215 tune_params::PREF_CONST_POOL_FALSE
,
2216 tune_params::PREF_LDRD_TRUE
,
2217 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2218 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2219 tune_params::DISPARAGE_FLAGS_ALL
,
2220 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2221 tune_params::FUSE_NOTHING
,
2222 tune_params::SCHED_AUTOPREF_OFF
2225 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2226 less appealing. Set max_insns_skipped to a low value. */
2228 const struct tune_params arm_cortex_a5_tune
=
2230 &cortexa5_extra_costs
,
2231 &generic_addr_mode_costs
, /* Addressing mode costs. */
2232 NULL
, /* Sched adj cost. */
2233 arm_cortex_a5_branch_cost
,
2234 &arm_default_vec_cost
,
2235 1, /* Constant limit. */
2236 1, /* Max cond insns. */
2237 8, /* Memset max inline. */
2238 2, /* Issue rate. */
2239 ARM_PREFETCH_NOT_BENEFICIAL
,
2240 tune_params::PREF_CONST_POOL_FALSE
,
2241 tune_params::PREF_LDRD_FALSE
,
2242 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2243 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2244 tune_params::DISPARAGE_FLAGS_NEITHER
,
2245 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2246 tune_params::FUSE_NOTHING
,
2247 tune_params::SCHED_AUTOPREF_OFF
2250 const struct tune_params arm_cortex_a9_tune
=
2252 &cortexa9_extra_costs
,
2253 &generic_addr_mode_costs
, /* Addressing mode costs. */
2254 cortex_a9_sched_adjust_cost
,
2255 arm_default_branch_cost
,
2256 &arm_default_vec_cost
,
2257 1, /* Constant limit. */
2258 5, /* Max cond insns. */
2259 8, /* Memset max inline. */
2260 2, /* Issue rate. */
2261 ARM_PREFETCH_BENEFICIAL(4,32,32),
2262 tune_params::PREF_CONST_POOL_FALSE
,
2263 tune_params::PREF_LDRD_FALSE
,
2264 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2265 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2266 tune_params::DISPARAGE_FLAGS_NEITHER
,
2267 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2268 tune_params::FUSE_NOTHING
,
2269 tune_params::SCHED_AUTOPREF_OFF
2272 const struct tune_params arm_cortex_a12_tune
=
2274 &cortexa12_extra_costs
,
2275 &generic_addr_mode_costs
, /* Addressing mode costs. */
2276 NULL
, /* Sched adj cost. */
2277 arm_default_branch_cost
,
2278 &arm_default_vec_cost
, /* Vectorizer costs. */
2279 1, /* Constant limit. */
2280 2, /* Max cond insns. */
2281 8, /* Memset max inline. */
2282 2, /* Issue rate. */
2283 ARM_PREFETCH_NOT_BENEFICIAL
,
2284 tune_params::PREF_CONST_POOL_FALSE
,
2285 tune_params::PREF_LDRD_TRUE
,
2286 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2287 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2288 tune_params::DISPARAGE_FLAGS_ALL
,
2289 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2290 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2291 tune_params::SCHED_AUTOPREF_OFF
2294 const struct tune_params arm_cortex_a73_tune
=
2296 &cortexa57_extra_costs
,
2297 &generic_addr_mode_costs
, /* Addressing mode costs. */
2298 NULL
, /* Sched adj cost. */
2299 arm_default_branch_cost
,
2300 &arm_default_vec_cost
, /* Vectorizer costs. */
2301 1, /* Constant limit. */
2302 2, /* Max cond insns. */
2303 8, /* Memset max inline. */
2304 2, /* Issue rate. */
2305 ARM_PREFETCH_NOT_BENEFICIAL
,
2306 tune_params::PREF_CONST_POOL_FALSE
,
2307 tune_params::PREF_LDRD_TRUE
,
2308 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2309 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2310 tune_params::DISPARAGE_FLAGS_ALL
,
2311 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2312 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2313 tune_params::SCHED_AUTOPREF_FULL
2316 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2317 cycle to execute each. An LDR from the constant pool also takes two cycles
2318 to execute, but mildly increases pipelining opportunity (consecutive
2319 loads/stores can be pipelined together, saving one cycle), and may also
2320 improve icache utilisation. Hence we prefer the constant pool for such
2323 const struct tune_params arm_v7m_tune
=
2326 &generic_addr_mode_costs
, /* Addressing mode costs. */
2327 NULL
, /* Sched adj cost. */
2328 arm_cortex_m_branch_cost
,
2329 &arm_default_vec_cost
,
2330 1, /* Constant limit. */
2331 2, /* Max cond insns. */
2332 8, /* Memset max inline. */
2333 1, /* Issue rate. */
2334 ARM_PREFETCH_NOT_BENEFICIAL
,
2335 tune_params::PREF_CONST_POOL_TRUE
,
2336 tune_params::PREF_LDRD_FALSE
,
2337 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2338 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2339 tune_params::DISPARAGE_FLAGS_NEITHER
,
2340 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2341 tune_params::FUSE_NOTHING
,
2342 tune_params::SCHED_AUTOPREF_OFF
2345 /* Cortex-M7 tuning. */
2347 const struct tune_params arm_cortex_m7_tune
=
2350 &generic_addr_mode_costs
, /* Addressing mode costs. */
2351 NULL
, /* Sched adj cost. */
2352 arm_cortex_m7_branch_cost
,
2353 &arm_default_vec_cost
,
2354 0, /* Constant limit. */
2355 1, /* Max cond insns. */
2356 8, /* Memset max inline. */
2357 2, /* Issue rate. */
2358 ARM_PREFETCH_NOT_BENEFICIAL
,
2359 tune_params::PREF_CONST_POOL_TRUE
,
2360 tune_params::PREF_LDRD_FALSE
,
2361 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2362 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2363 tune_params::DISPARAGE_FLAGS_NEITHER
,
2364 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2365 tune_params::FUSE_NOTHING
,
2366 tune_params::SCHED_AUTOPREF_OFF
2369 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2370 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2372 const struct tune_params arm_v6m_tune
=
2374 &generic_extra_costs
, /* Insn extra costs. */
2375 &generic_addr_mode_costs
, /* Addressing mode costs. */
2376 NULL
, /* Sched adj cost. */
2377 arm_default_branch_cost
,
2378 &arm_default_vec_cost
, /* Vectorizer costs. */
2379 1, /* Constant limit. */
2380 5, /* Max cond insns. */
2381 8, /* Memset max inline. */
2382 1, /* Issue rate. */
2383 ARM_PREFETCH_NOT_BENEFICIAL
,
2384 tune_params::PREF_CONST_POOL_FALSE
,
2385 tune_params::PREF_LDRD_FALSE
,
2386 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2387 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2388 tune_params::DISPARAGE_FLAGS_NEITHER
,
2389 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2390 tune_params::FUSE_NOTHING
,
2391 tune_params::SCHED_AUTOPREF_OFF
2394 const struct tune_params arm_fa726te_tune
=
2396 &generic_extra_costs
, /* Insn extra costs. */
2397 &generic_addr_mode_costs
, /* Addressing mode costs. */
2398 fa726te_sched_adjust_cost
,
2399 arm_default_branch_cost
,
2400 &arm_default_vec_cost
,
2401 1, /* Constant limit. */
2402 5, /* Max cond insns. */
2403 8, /* Memset max inline. */
2404 2, /* Issue rate. */
2405 ARM_PREFETCH_NOT_BENEFICIAL
,
2406 tune_params::PREF_CONST_POOL_TRUE
,
2407 tune_params::PREF_LDRD_FALSE
,
2408 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2409 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2410 tune_params::DISPARAGE_FLAGS_NEITHER
,
2411 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2412 tune_params::FUSE_NOTHING
,
2413 tune_params::SCHED_AUTOPREF_OFF
2416 /* Auto-generated CPU, FPU and architecture tables. */
2417 #include "arm-cpu-data.h"
2419 /* The name of the preprocessor macro to define for this architecture. PROFILE
2420 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2421 is thus chosen to be big enough to hold the longest architecture name. */
2423 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2425 /* Supported TLS relocations. */
2436 TLS_DESCSEQ
/* GNU scheme */
2439 /* The maximum number of insns to be used when loading a constant. */
2441 arm_constant_limit (bool size_p
)
2443 return size_p
? 1 : current_tune
->constant_limit
;
2446 /* Emit an insn that's a simple single-set. Both the operands must be known
2448 inline static rtx_insn
*
2449 emit_set_insn (rtx x
, rtx y
)
2451 return emit_insn (gen_rtx_SET (x
, y
));
2454 /* Return the number of bits set in VALUE. */
2456 bit_count (unsigned long value
)
2458 unsigned long count
= 0;
2463 value
&= value
- 1; /* Clear the least-significant set bit. */
2469 /* Return the number of bits set in BMAP. */
2471 bitmap_popcount (const sbitmap bmap
)
2473 unsigned int count
= 0;
2475 sbitmap_iterator sbi
;
2477 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2486 } arm_fixed_mode_set
;
2488 /* A small helper for setting fixed-point library libfuncs. */
2491 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2492 const char *funcname
, const char *modename
,
2497 if (num_suffix
== 0)
2498 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2500 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2502 set_optab_libfunc (optable
, mode
, buffer
);
2506 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2507 machine_mode from
, const char *funcname
,
2508 const char *toname
, const char *fromname
)
2511 const char *maybe_suffix_2
= "";
2513 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2514 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2515 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2516 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2517 maybe_suffix_2
= "2";
2519 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2522 set_conv_libfunc (optable
, to
, from
, buffer
);
2525 static GTY(()) rtx speculation_barrier_libfunc
;
2527 /* Record that we have no arithmetic or comparison libfuncs for
2528 machine mode MODE. */
2531 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode
)
2534 set_optab_libfunc (add_optab
, mode
, NULL
);
2535 set_optab_libfunc (sdiv_optab
, mode
, NULL
);
2536 set_optab_libfunc (smul_optab
, mode
, NULL
);
2537 set_optab_libfunc (neg_optab
, mode
, NULL
);
2538 set_optab_libfunc (sub_optab
, mode
, NULL
);
2541 set_optab_libfunc (eq_optab
, mode
, NULL
);
2542 set_optab_libfunc (ne_optab
, mode
, NULL
);
2543 set_optab_libfunc (lt_optab
, mode
, NULL
);
2544 set_optab_libfunc (le_optab
, mode
, NULL
);
2545 set_optab_libfunc (ge_optab
, mode
, NULL
);
2546 set_optab_libfunc (gt_optab
, mode
, NULL
);
2547 set_optab_libfunc (unord_optab
, mode
, NULL
);
2550 /* Set up library functions unique to ARM. */
2552 arm_init_libfuncs (void)
2554 machine_mode mode_iter
;
2556 /* For Linux, we have access to kernel support for atomic operations. */
2557 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2558 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2560 /* There are no special library functions unless we are using the
2565 /* The functions below are described in Section 4 of the "Run-Time
2566 ABI for the ARM architecture", Version 1.0. */
2568 /* Double-precision floating-point arithmetic. Table 2. */
2569 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2570 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2571 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2572 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2573 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2575 /* Double-precision comparisons. Table 3. */
2576 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2577 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2578 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2579 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2580 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2581 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2582 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2584 /* Single-precision floating-point arithmetic. Table 4. */
2585 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2586 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2587 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2588 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2589 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2591 /* Single-precision comparisons. Table 5. */
2592 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2593 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2594 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2595 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2596 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2597 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2598 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2600 /* Floating-point to integer conversions. Table 6. */
2601 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2602 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2603 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2604 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2605 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2606 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2607 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2608 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2610 /* Conversions between floating types. Table 7. */
2611 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2612 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2614 /* Integer to floating-point conversions. Table 8. */
2615 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2616 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2617 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2618 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2619 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2620 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2621 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2622 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2624 /* Long long. Table 9. */
2625 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2626 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2627 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2628 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2629 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2630 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2631 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2632 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2634 /* Integer (32/32->32) division. \S 4.3.1. */
2635 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2636 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2638 /* The divmod functions are designed so that they can be used for
2639 plain division, even though they return both the quotient and the
2640 remainder. The quotient is returned in the usual location (i.e.,
2641 r0 for SImode, {r0, r1} for DImode), just as would be expected
2642 for an ordinary division routine. Because the AAPCS calling
2643 conventions specify that all of { r0, r1, r2, r3 } are
2644 callee-saved registers, there is no need to tell the compiler
2645 explicitly that those registers are clobbered by these
2647 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2648 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2650 /* For SImode division the ABI provides div-without-mod routines,
2651 which are faster. */
2652 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2653 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2655 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2656 divmod libcalls instead. */
2657 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2658 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2659 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2660 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2662 /* Half-precision float operations. The compiler handles all operations
2663 with NULL libfuncs by converting the SFmode. */
2664 switch (arm_fp16_format
)
2666 case ARM_FP16_FORMAT_IEEE
:
2667 case ARM_FP16_FORMAT_ALTERNATIVE
:
2670 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2671 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2673 : "__gnu_f2h_alternative"));
2674 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2675 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2677 : "__gnu_h2f_alternative"));
2679 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2680 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2682 : "__gnu_d2h_alternative"));
2684 arm_block_arith_comp_libfuncs_for_mode (HFmode
);
2691 /* For all possible libcalls in BFmode, record NULL. */
2692 FOR_EACH_MODE_IN_CLASS (mode_iter
, MODE_FLOAT
)
2694 set_conv_libfunc (trunc_optab
, BFmode
, mode_iter
, NULL
);
2695 set_conv_libfunc (trunc_optab
, mode_iter
, BFmode
, NULL
);
2696 set_conv_libfunc (sext_optab
, mode_iter
, BFmode
, NULL
);
2697 set_conv_libfunc (sext_optab
, BFmode
, mode_iter
, NULL
);
2699 arm_block_arith_comp_libfuncs_for_mode (BFmode
);
2701 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2703 const arm_fixed_mode_set fixed_arith_modes
[] =
2706 { E_UQQmode
, "uqq" },
2708 { E_UHQmode
, "uhq" },
2710 { E_USQmode
, "usq" },
2712 { E_UDQmode
, "udq" },
2714 { E_UTQmode
, "utq" },
2716 { E_UHAmode
, "uha" },
2718 { E_USAmode
, "usa" },
2720 { E_UDAmode
, "uda" },
2722 { E_UTAmode
, "uta" }
2724 const arm_fixed_mode_set fixed_conv_modes
[] =
2727 { E_UQQmode
, "uqq" },
2729 { E_UHQmode
, "uhq" },
2731 { E_USQmode
, "usq" },
2733 { E_UDQmode
, "udq" },
2735 { E_UTQmode
, "utq" },
2737 { E_UHAmode
, "uha" },
2739 { E_USAmode
, "usa" },
2741 { E_UDAmode
, "uda" },
2743 { E_UTAmode
, "uta" },
2754 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2756 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2757 "add", fixed_arith_modes
[i
].name
, 3);
2758 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2759 "ssadd", fixed_arith_modes
[i
].name
, 3);
2760 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2761 "usadd", fixed_arith_modes
[i
].name
, 3);
2762 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2763 "sub", fixed_arith_modes
[i
].name
, 3);
2764 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2765 "sssub", fixed_arith_modes
[i
].name
, 3);
2766 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2767 "ussub", fixed_arith_modes
[i
].name
, 3);
2768 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2769 "mul", fixed_arith_modes
[i
].name
, 3);
2770 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2771 "ssmul", fixed_arith_modes
[i
].name
, 3);
2772 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2773 "usmul", fixed_arith_modes
[i
].name
, 3);
2774 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2775 "div", fixed_arith_modes
[i
].name
, 3);
2776 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2777 "udiv", fixed_arith_modes
[i
].name
, 3);
2778 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2779 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2780 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2781 "usdiv", fixed_arith_modes
[i
].name
, 3);
2782 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2783 "neg", fixed_arith_modes
[i
].name
, 2);
2784 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2785 "ssneg", fixed_arith_modes
[i
].name
, 2);
2786 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2787 "usneg", fixed_arith_modes
[i
].name
, 2);
2788 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2789 "ashl", fixed_arith_modes
[i
].name
, 3);
2790 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2791 "ashr", fixed_arith_modes
[i
].name
, 3);
2792 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2793 "lshr", fixed_arith_modes
[i
].name
, 3);
2794 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2795 "ssashl", fixed_arith_modes
[i
].name
, 3);
2796 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2797 "usashl", fixed_arith_modes
[i
].name
, 3);
2798 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2799 "cmp", fixed_arith_modes
[i
].name
, 2);
2802 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2803 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2806 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2807 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2810 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2811 fixed_conv_modes
[j
].mode
, "fract",
2812 fixed_conv_modes
[i
].name
,
2813 fixed_conv_modes
[j
].name
);
2814 arm_set_fixed_conv_libfunc (satfract_optab
,
2815 fixed_conv_modes
[i
].mode
,
2816 fixed_conv_modes
[j
].mode
, "satfract",
2817 fixed_conv_modes
[i
].name
,
2818 fixed_conv_modes
[j
].name
);
2819 arm_set_fixed_conv_libfunc (fractuns_optab
,
2820 fixed_conv_modes
[i
].mode
,
2821 fixed_conv_modes
[j
].mode
, "fractuns",
2822 fixed_conv_modes
[i
].name
,
2823 fixed_conv_modes
[j
].name
);
2824 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2825 fixed_conv_modes
[i
].mode
,
2826 fixed_conv_modes
[j
].mode
, "satfractuns",
2827 fixed_conv_modes
[i
].name
,
2828 fixed_conv_modes
[j
].name
);
2832 if (TARGET_AAPCS_BASED
)
2833 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2835 speculation_barrier_libfunc
= init_one_libfunc ("__speculation_barrier");
2838 /* On AAPCS systems, this is the "struct __va_list". */
2839 static GTY(()) tree va_list_type
;
2841 /* Return the type to use as __builtin_va_list. */
2843 arm_build_builtin_va_list (void)
2848 if (!TARGET_AAPCS_BASED
)
2849 return std_build_builtin_va_list ();
2851 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2859 The C Library ABI further reinforces this definition in \S
2862 We must follow this definition exactly. The structure tag
2863 name is visible in C++ mangled names, and thus forms a part
2864 of the ABI. The field name may be used by people who
2865 #include <stdarg.h>. */
2866 /* Create the type. */
2867 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2868 /* Give it the required name. */
2869 va_list_name
= build_decl (BUILTINS_LOCATION
,
2871 get_identifier ("__va_list"),
2873 DECL_ARTIFICIAL (va_list_name
) = 1;
2874 TYPE_NAME (va_list_type
) = va_list_name
;
2875 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2876 /* Create the __ap field. */
2877 ap_field
= build_decl (BUILTINS_LOCATION
,
2879 get_identifier ("__ap"),
2881 DECL_ARTIFICIAL (ap_field
) = 1;
2882 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2883 TYPE_FIELDS (va_list_type
) = ap_field
;
2884 /* Compute its layout. */
2885 layout_type (va_list_type
);
2887 return va_list_type
;
2890 /* Return an expression of type "void *" pointing to the next
2891 available argument in a variable-argument list. VALIST is the
2892 user-level va_list object, of type __builtin_va_list. */
2894 arm_extract_valist_ptr (tree valist
)
2896 if (TREE_TYPE (valist
) == error_mark_node
)
2897 return error_mark_node
;
2899 /* On an AAPCS target, the pointer is stored within "struct
2901 if (TARGET_AAPCS_BASED
)
2903 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2904 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2905 valist
, ap_field
, NULL_TREE
);
2911 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2913 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2915 valist
= arm_extract_valist_ptr (valist
);
2916 std_expand_builtin_va_start (valist
, nextarg
);
2919 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2921 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2924 valist
= arm_extract_valist_ptr (valist
);
2925 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2928 /* Check any incompatible options that the user has specified. */
2930 arm_option_check_internal (struct gcc_options
*opts
)
2932 int flags
= opts
->x_target_flags
;
2934 /* iWMMXt and NEON are incompatible. */
2936 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2937 error ("iWMMXt and NEON are incompatible");
2939 /* Make sure that the processor choice does not conflict with any of the
2940 other command line choices. */
2941 if (TARGET_ARM_P (flags
)
2942 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2943 error ("target CPU does not support ARM mode");
2945 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2946 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2947 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2949 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2950 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2952 /* If this target is normally configured to use APCS frames, warn if they
2953 are turned off and debugging is turned on. */
2954 if (TARGET_ARM_P (flags
)
2955 && write_symbols
!= NO_DEBUG
2956 && !TARGET_APCS_FRAME
2957 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2958 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2961 /* iWMMXt unsupported under Thumb mode. */
2962 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2963 error ("iWMMXt unsupported under Thumb mode");
2965 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2966 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2968 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2970 error ("RTP PIC is incompatible with Thumb");
2974 if (target_pure_code
|| target_slow_flash_data
)
2976 const char *flag
= (target_pure_code
? "-mpure-code" :
2977 "-mslow-flash-data");
2978 bool common_unsupported_modes
= arm_arch_notm
|| flag_pic
|| TARGET_NEON
;
2980 /* We only support -mslow-flash-data on M-profile targets with
2982 if (target_slow_flash_data
&& (!TARGET_HAVE_MOVT
|| common_unsupported_modes
))
2983 error ("%s only supports non-pic code on M-profile targets with the "
2984 "MOVT instruction", flag
);
2986 /* We only support -mpure-code on M-profile targets. */
2987 if (target_pure_code
&& common_unsupported_modes
)
2988 error ("%s only supports non-pic code on M-profile targets", flag
);
2990 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2991 -mword-relocations forbids relocation of MOVT/MOVW. */
2992 if (target_word_relocations
)
2993 error ("%s incompatible with %<-mword-relocations%>", flag
);
2997 /* Recompute the global settings depending on target attribute options. */
3000 arm_option_params_internal (void)
3002 /* If we are not using the default (ARM mode) section anchor offset
3003 ranges, then set the correct ranges now. */
3006 /* Thumb-1 LDR instructions cannot have negative offsets.
3007 Permissible positive offset ranges are 5-bit (for byte loads),
3008 6-bit (for halfword loads), or 7-bit (for word loads).
3009 Empirical results suggest a 7-bit anchor range gives the best
3010 overall code size. */
3011 targetm
.min_anchor_offset
= 0;
3012 targetm
.max_anchor_offset
= 127;
3014 else if (TARGET_THUMB2
)
3016 /* The minimum is set such that the total size of the block
3017 for a particular anchor is 248 + 1 + 4095 bytes, which is
3018 divisible by eight, ensuring natural spacing of anchors. */
3019 targetm
.min_anchor_offset
= -248;
3020 targetm
.max_anchor_offset
= 4095;
3024 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
3025 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
3028 /* Increase the number of conditional instructions with -Os. */
3029 max_insns_skipped
= optimize_size
? 4 : current_tune
->max_insns_skipped
;
3031 /* For THUMB2, we limit the conditional sequence to one IT block. */
3033 max_insns_skipped
= MIN (max_insns_skipped
, MAX_INSN_PER_IT_BLOCK
);
3036 targetm
.md_asm_adjust
= thumb1_md_asm_adjust
;
3038 targetm
.md_asm_adjust
= arm_md_asm_adjust
;
3041 /* True if -mflip-thumb should next add an attribute for the default
3042 mode, false if it should next add an attribute for the opposite mode. */
3043 static GTY(()) bool thumb_flipper
;
3045 /* Options after initial target override. */
3046 static GTY(()) tree init_optimize
;
3049 arm_override_options_after_change_1 (struct gcc_options
*opts
,
3050 struct gcc_options
*opts_set
)
3052 /* -falign-functions without argument: supply one. */
3053 if (opts
->x_flag_align_functions
&& !opts_set
->x_str_align_functions
)
3054 opts
->x_str_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
3055 && opts
->x_optimize_size
? "2" : "4";
3058 /* Implement targetm.override_options_after_change. */
3061 arm_override_options_after_change (void)
3063 arm_override_options_after_change_1 (&global_options
, &global_options_set
);
3066 /* Implement TARGET_OPTION_RESTORE. */
3068 arm_option_restore (struct gcc_options */
* opts */
,
3069 struct gcc_options */
* opts_set */
,
3070 struct cl_target_option
*ptr
)
3072 arm_configure_build_target (&arm_active_target
, ptr
, false);
3073 arm_option_reconfigure_globals ();
3076 /* Reset options between modes that the user has specified. */
3078 arm_option_override_internal (struct gcc_options
*opts
,
3079 struct gcc_options
*opts_set
)
3081 arm_override_options_after_change_1 (opts
, opts_set
);
3083 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3085 /* The default is to enable interworking, so this warning message would
3086 be confusing to users who have just compiled with
3087 eg, -march=armv4. */
3088 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3089 opts
->x_target_flags
&= ~MASK_INTERWORK
;
3092 if (TARGET_THUMB_P (opts
->x_target_flags
)
3093 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3095 warning (0, "target CPU does not support THUMB instructions");
3096 opts
->x_target_flags
&= ~MASK_THUMB
;
3099 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
3101 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3102 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
3105 /* Callee super interworking implies thumb interworking. Adding
3106 this to the flags here simplifies the logic elsewhere. */
3107 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
3108 opts
->x_target_flags
|= MASK_INTERWORK
;
3110 /* need to remember initial values so combinaisons of options like
3111 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3112 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
3114 if (! opts_set
->x_arm_restrict_it
)
3115 opts
->x_arm_restrict_it
= arm_arch8
;
3117 /* ARM execution state and M profile don't have [restrict] IT. */
3118 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
3119 opts
->x_arm_restrict_it
= 0;
3121 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3122 if (!opts_set
->x_arm_restrict_it
3123 && (opts_set
->x_arm_cpu_string
|| opts_set
->x_arm_tune_string
))
3124 opts
->x_arm_restrict_it
= 0;
3126 /* Enable -munaligned-access by default for
3127 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3128 i.e. Thumb2 and ARM state only.
3129 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3130 - ARMv8 architecture-base processors.
3132 Disable -munaligned-access by default for
3133 - all pre-ARMv6 architecture-based processors
3134 - ARMv6-M architecture-based processors
3135 - ARMv8-M Baseline processors. */
3137 if (! opts_set
->x_unaligned_access
)
3139 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
3140 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
3142 else if (opts
->x_unaligned_access
== 1
3143 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3145 warning (0, "target CPU does not support unaligned accesses");
3146 opts
->x_unaligned_access
= 0;
3149 /* Don't warn since it's on by default in -O2. */
3150 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3151 opts
->x_flag_schedule_insns
= 0;
3153 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3155 /* Disable shrink-wrap when optimizing function for size, since it tends to
3156 generate additional returns. */
3157 if (optimize_function_for_size_p (cfun
)
3158 && TARGET_THUMB2_P (opts
->x_target_flags
))
3159 opts
->x_flag_shrink_wrap
= false;
3161 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3163 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3164 - epilogue_insns - does not accurately model the corresponding insns
3165 emitted in the asm file. In particular, see the comment in thumb_exit
3166 'Find out how many of the (return) argument registers we can corrupt'.
3167 As a consequence, the epilogue may clobber registers without fipa-ra
3168 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3169 TODO: Accurately model clobbers for epilogue_insns and reenable
3171 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3172 opts
->x_flag_ipa_ra
= 0;
3174 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3176 /* Thumb2 inline assembly code should always use unified syntax.
3177 This will apply to ARM and Thumb1 eventually. */
3178 if (TARGET_THUMB2_P (opts
->x_target_flags
))
3179 opts
->x_inline_asm_unified
= true;
3181 if (arm_stack_protector_guard
== SSP_GLOBAL
3182 && opts
->x_arm_stack_protector_guard_offset_str
)
3184 error ("incompatible options %<-mstack-protector-guard=global%> and "
3185 "%<-mstack-protector-guard-offset=%s%>",
3186 arm_stack_protector_guard_offset_str
);
3189 if (opts
->x_arm_stack_protector_guard_offset_str
)
3192 const char *str
= arm_stack_protector_guard_offset_str
;
3194 long offs
= strtol (arm_stack_protector_guard_offset_str
, &end
, 0);
3195 if (!*str
|| *end
|| errno
)
3196 error ("%qs is not a valid offset in %qs", str
,
3197 "-mstack-protector-guard-offset=");
3198 arm_stack_protector_guard_offset
= offs
;
3201 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3202 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3206 static sbitmap isa_all_fpubits_internal
;
3207 static sbitmap isa_all_fpbits
;
3208 static sbitmap isa_quirkbits
;
3210 /* Configure a build target TARGET from the user-specified options OPTS and
3211 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3212 architecture have been specified, but the two are not identical. */
3214 arm_configure_build_target (struct arm_build_target
*target
,
3215 struct cl_target_option
*opts
,
3216 bool warn_compatible
)
3218 const cpu_option
*arm_selected_tune
= NULL
;
3219 const arch_option
*arm_selected_arch
= NULL
;
3220 const cpu_option
*arm_selected_cpu
= NULL
;
3221 const arm_fpu_desc
*arm_selected_fpu
= NULL
;
3222 const char *tune_opts
= NULL
;
3223 const char *arch_opts
= NULL
;
3224 const char *cpu_opts
= NULL
;
3226 bitmap_clear (target
->isa
);
3227 target
->core_name
= NULL
;
3228 target
->arch_name
= NULL
;
3230 if (opts
->x_arm_arch_string
)
3232 arm_selected_arch
= arm_parse_arch_option_name (all_architectures
,
3234 opts
->x_arm_arch_string
);
3235 arch_opts
= strchr (opts
->x_arm_arch_string
, '+');
3238 if (opts
->x_arm_cpu_string
)
3240 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "-mcpu",
3241 opts
->x_arm_cpu_string
);
3242 cpu_opts
= strchr (opts
->x_arm_cpu_string
, '+');
3243 arm_selected_tune
= arm_selected_cpu
;
3244 /* If taking the tuning from -mcpu, we don't need to rescan the
3245 options for tuning. */
3248 if (opts
->x_arm_tune_string
)
3250 arm_selected_tune
= arm_parse_cpu_option_name (all_cores
, "-mtune",
3251 opts
->x_arm_tune_string
);
3252 tune_opts
= strchr (opts
->x_arm_tune_string
, '+');
3255 if (arm_selected_arch
)
3257 arm_initialize_isa (target
->isa
, arm_selected_arch
->common
.isa_bits
);
3258 arm_parse_option_features (target
->isa
, &arm_selected_arch
->common
,
3261 if (arm_selected_cpu
)
3263 auto_sbitmap
cpu_isa (isa_num_bits
);
3264 auto_sbitmap
isa_delta (isa_num_bits
);
3266 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->common
.isa_bits
);
3267 arm_parse_option_features (cpu_isa
, &arm_selected_cpu
->common
,
3269 bitmap_xor (isa_delta
, cpu_isa
, target
->isa
);
3270 /* Ignore any bits that are quirk bits. */
3271 bitmap_and_compl (isa_delta
, isa_delta
, isa_quirkbits
);
3272 /* If the user (or the default configuration) has specified a
3273 specific FPU, then ignore any bits that depend on the FPU
3274 configuration. Do similarly if using the soft-float
3276 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
3277 || arm_float_abi
== ARM_FLOAT_ABI_SOFT
)
3278 bitmap_and_compl (isa_delta
, isa_delta
, isa_all_fpbits
);
3280 if (!bitmap_empty_p (isa_delta
))
3282 if (warn_compatible
)
3283 warning (0, "switch %<-mcpu=%s%> conflicts "
3284 "with switch %<-march=%s%>",
3285 opts
->x_arm_cpu_string
,
3286 opts
->x_arm_arch_string
);
3288 /* -march wins for code generation.
3289 -mcpu wins for default tuning. */
3290 if (!arm_selected_tune
)
3291 arm_selected_tune
= arm_selected_cpu
;
3293 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3294 target
->arch_name
= arm_selected_arch
->common
.name
;
3298 /* Architecture and CPU are essentially the same.
3299 Prefer the CPU setting. */
3300 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3301 target
->core_name
= arm_selected_cpu
->common
.name
;
3302 /* Copy the CPU's capabilities, so that we inherit the
3303 appropriate extensions and quirks. */
3304 bitmap_copy (target
->isa
, cpu_isa
);
3309 /* Pick a CPU based on the architecture. */
3310 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3311 target
->arch_name
= arm_selected_arch
->common
.name
;
3312 /* Note: target->core_name is left unset in this path. */
3315 else if (arm_selected_cpu
)
3317 target
->core_name
= arm_selected_cpu
->common
.name
;
3318 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3319 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3321 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3323 /* If the user did not specify a processor or architecture, choose
3327 const cpu_option
*sel
;
3328 auto_sbitmap
sought_isa (isa_num_bits
);
3329 bitmap_clear (sought_isa
);
3330 auto_sbitmap
default_isa (isa_num_bits
);
3332 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "default CPU",
3333 TARGET_CPU_DEFAULT
);
3334 cpu_opts
= strchr (TARGET_CPU_DEFAULT
, '+');
3335 gcc_assert (arm_selected_cpu
->common
.name
);
3337 /* RWE: All of the selection logic below (to the end of this
3338 'if' clause) looks somewhat suspect. It appears to be mostly
3339 there to support forcing thumb support when the default CPU
3340 does not have thumb (somewhat dubious in terms of what the
3341 user might be expecting). I think it should be removed once
3342 support for the pre-thumb era cores is removed. */
3343 sel
= arm_selected_cpu
;
3344 arm_initialize_isa (default_isa
, sel
->common
.isa_bits
);
3345 arm_parse_option_features (default_isa
, &arm_selected_cpu
->common
,
3348 /* Now check to see if the user has specified any command line
3349 switches that require certain abilities from the cpu. */
3351 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3352 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3354 /* If there are such requirements and the default CPU does not
3355 satisfy them, we need to run over the complete list of
3356 cores looking for one that is satisfactory. */
3357 if (!bitmap_empty_p (sought_isa
)
3358 && !bitmap_subset_p (sought_isa
, default_isa
))
3360 auto_sbitmap
candidate_isa (isa_num_bits
);
3361 /* We're only interested in a CPU with at least the
3362 capabilities of the default CPU and the required
3363 additional features. */
3364 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3366 /* Try to locate a CPU type that supports all of the abilities
3367 of the default CPU, plus the extra abilities requested by
3369 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3371 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3372 /* An exact match? */
3373 if (bitmap_equal_p (default_isa
, candidate_isa
))
3377 if (sel
->common
.name
== NULL
)
3379 unsigned current_bit_count
= isa_num_bits
;
3380 const cpu_option
*best_fit
= NULL
;
3382 /* Ideally we would like to issue an error message here
3383 saying that it was not possible to find a CPU compatible
3384 with the default CPU, but which also supports the command
3385 line options specified by the programmer, and so they
3386 ought to use the -mcpu=<name> command line option to
3387 override the default CPU type.
3389 If we cannot find a CPU that has exactly the
3390 characteristics of the default CPU and the given
3391 command line options we scan the array again looking
3392 for a best match. The best match must have at least
3393 the capabilities of the perfect match. */
3394 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3396 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3398 if (bitmap_subset_p (default_isa
, candidate_isa
))
3402 bitmap_and_compl (candidate_isa
, candidate_isa
,
3404 count
= bitmap_popcount (candidate_isa
);
3406 if (count
< current_bit_count
)
3409 current_bit_count
= count
;
3413 gcc_assert (best_fit
);
3417 arm_selected_cpu
= sel
;
3420 /* Now we know the CPU, we can finally initialize the target
3422 target
->core_name
= arm_selected_cpu
->common
.name
;
3423 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3424 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3426 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3429 gcc_assert (arm_selected_cpu
);
3430 gcc_assert (arm_selected_arch
);
3432 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3434 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3435 auto_sbitmap
fpu_bits (isa_num_bits
);
3437 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3438 /* This should clear out ALL bits relating to the FPU/simd
3439 extensions, to avoid potentially invalid combinations later on
3440 that we can't match. At present we only clear out those bits
3441 that can be set by -mfpu. This should be fixed in GCC-12. */
3442 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits_internal
);
3443 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3446 /* If we have the soft-float ABI, clear any feature bits relating to use of
3447 floating-point operations. They'll just confuse things later on. */
3448 if (arm_float_abi
== ARM_FLOAT_ABI_SOFT
)
3449 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpbits
);
3451 /* There may be implied bits which we still need to enable. These are
3452 non-named features which are needed to complete other sets of features,
3453 but cannot be enabled from arm-cpus.in due to being shared between
3454 multiple fgroups. Each entry in all_implied_fbits is of the form
3455 ante -> cons, meaning that if the feature "ante" is enabled, we should
3456 implicitly enable "cons". */
3457 const struct fbit_implication
*impl
= all_implied_fbits
;
3460 if (bitmap_bit_p (target
->isa
, impl
->ante
))
3461 bitmap_set_bit (target
->isa
, impl
->cons
);
3465 if (!arm_selected_tune
)
3466 arm_selected_tune
= arm_selected_cpu
;
3467 else /* Validate the features passed to -mtune. */
3468 arm_parse_option_features (NULL
, &arm_selected_tune
->common
, tune_opts
);
3470 const cpu_tune
*tune_data
= &all_tunes
[arm_selected_tune
- all_cores
];
3472 /* Finish initializing the target structure. */
3473 if (!target
->arch_name
)
3474 target
->arch_name
= arm_selected_arch
->common
.name
;
3475 target
->arch_pp_name
= arm_selected_arch
->arch
;
3476 target
->base_arch
= arm_selected_arch
->base_arch
;
3477 target
->profile
= arm_selected_arch
->profile
;
3479 target
->tune_flags
= tune_data
->tune_flags
;
3480 target
->tune
= tune_data
->tune
;
3481 target
->tune_core
= tune_data
->scheduler
;
3484 /* Fix up any incompatible options that the user has specified. */
3486 arm_option_override (void)
3488 static const enum isa_feature fpu_bitlist_internal
[]
3489 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
3490 /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main. */
3491 static const enum isa_feature fp_bitlist
[]
3492 = { ISA_ALL_FP
, isa_bit_mve_float
, isa_nobit
};
3493 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3494 cl_target_option opts
;
3496 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3497 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3499 isa_all_fpubits_internal
= sbitmap_alloc (isa_num_bits
);
3500 isa_all_fpbits
= sbitmap_alloc (isa_num_bits
);
3501 arm_initialize_isa (isa_all_fpubits_internal
, fpu_bitlist_internal
);
3502 arm_initialize_isa (isa_all_fpbits
, fp_bitlist
);
3504 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3506 if (!OPTION_SET_P (arm_fpu_index
))
3511 ok
= opt_enum_arg_to_value (OPT_mfpu_
, FPUTYPE_AUTO
, &fpu_index
,
3514 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3517 cl_target_option_save (&opts
, &global_options
, &global_options_set
);
3518 arm_configure_build_target (&arm_active_target
, &opts
, true);
3520 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3521 SUBTARGET_OVERRIDE_OPTIONS
;
3524 /* Initialize boolean versions of the architectural flags, for use
3525 in the arm.md file and for enabling feature flags. */
3526 arm_option_reconfigure_globals ();
3528 arm_tune
= arm_active_target
.tune_core
;
3529 tune_flags
= arm_active_target
.tune_flags
;
3530 current_tune
= arm_active_target
.tune
;
3532 /* TBD: Dwarf info for apcs frame is not handled yet. */
3533 if (TARGET_APCS_FRAME
)
3534 flag_shrink_wrap
= false;
3536 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3538 warning (0, "%<-mapcs-stack-check%> incompatible with "
3539 "%<-mno-apcs-frame%>");
3540 target_flags
|= MASK_APCS_FRAME
;
3543 if (TARGET_POKE_FUNCTION_NAME
)
3544 target_flags
|= MASK_APCS_FRAME
;
3546 if (TARGET_APCS_REENT
&& flag_pic
)
3547 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3549 if (TARGET_APCS_REENT
)
3550 warning (0, "APCS reentrant code not supported. Ignored");
3552 /* Set up some tuning parameters. */
3553 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3554 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3555 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3556 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3557 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3558 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3560 /* For arm2/3 there is no need to do any scheduling if we are doing
3561 software floating-point. */
3562 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3563 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3565 /* Override the default structure alignment for AAPCS ABI. */
3566 if (!OPTION_SET_P (arm_structure_size_boundary
))
3568 if (TARGET_AAPCS_BASED
)
3569 arm_structure_size_boundary
= 8;
3573 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3575 if (arm_structure_size_boundary
!= 8
3576 && arm_structure_size_boundary
!= 32
3577 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3579 if (ARM_DOUBLEWORD_ALIGN
)
3581 "structure size boundary can only be set to 8, 32 or 64");
3583 warning (0, "structure size boundary can only be set to 8 or 32");
3584 arm_structure_size_boundary
3585 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3589 if (TARGET_VXWORKS_RTP
)
3591 if (!OPTION_SET_P (arm_pic_data_is_text_relative
))
3592 arm_pic_data_is_text_relative
= 0;
3595 && !arm_pic_data_is_text_relative
3596 && !(OPTION_SET_P (target_flags
) & MASK_SINGLE_PIC_BASE
))
3597 /* When text & data segments don't have a fixed displacement, the
3598 intended use is with a single, read only, pic base register.
3599 Unless the user explicitly requested not to do that, set
3601 target_flags
|= MASK_SINGLE_PIC_BASE
;
3603 /* If stack checking is disabled, we can use r10 as the PIC register,
3604 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3605 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3607 if (TARGET_VXWORKS_RTP
)
3608 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3609 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3612 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3613 arm_pic_register
= 9;
3615 /* If in FDPIC mode then force arm_pic_register to be r9. */
3618 arm_pic_register
= FDPIC_REGNUM
;
3620 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3623 if (arm_pic_register_string
!= NULL
)
3625 int pic_register
= decode_reg_name (arm_pic_register_string
);
3628 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3630 /* Prevent the user from choosing an obviously stupid PIC register. */
3631 else if (pic_register
< 0 || call_used_or_fixed_reg_p (pic_register
)
3632 || pic_register
== HARD_FRAME_POINTER_REGNUM
3633 || pic_register
== STACK_POINTER_REGNUM
3634 || pic_register
>= PC_REGNUM
3635 || (TARGET_VXWORKS_RTP
3636 && (unsigned int) pic_register
!= arm_pic_register
))
3637 error ("unable to use %qs for PIC register", arm_pic_register_string
);
3639 arm_pic_register
= pic_register
;
3643 target_word_relocations
= 1;
3645 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3646 if (fix_cm3_ldrd
== 2)
3648 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_cm3_ldrd
))
3654 /* Enable fix_vlldm by default if required. */
3657 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_vlldm
))
3663 /* Enable fix_aes by default if required. */
3664 if (fix_aes_erratum_1742098
== 2)
3666 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_aes_1742098
))
3667 fix_aes_erratum_1742098
= 1;
3669 fix_aes_erratum_1742098
= 0;
3672 /* Hot/Cold partitioning is not currently supported, since we can't
3673 handle literal pool placement in that case. */
3674 if (flag_reorder_blocks_and_partition
)
3676 inform (input_location
,
3677 "%<-freorder-blocks-and-partition%> not supported "
3678 "on this architecture");
3679 flag_reorder_blocks_and_partition
= 0;
3680 flag_reorder_blocks
= 1;
3684 /* Hoisting PIC address calculations more aggressively provides a small,
3685 but measurable, size reduction for PIC code. Therefore, we decrease
3686 the bar for unrestricted expression hoisting to the cost of PIC address
3687 calculation, which is 2 instructions. */
3688 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3689 param_gcse_unrestricted_cost
, 2);
3691 /* ARM EABI defaults to strict volatile bitfields. */
3692 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3693 && abi_version_at_least(2))
3694 flag_strict_volatile_bitfields
= 1;
3696 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3697 have deemed it beneficial (signified by setting
3698 prefetch.num_slots to 1 or more). */
3699 if (flag_prefetch_loop_arrays
< 0
3702 && current_tune
->prefetch
.num_slots
> 0)
3703 flag_prefetch_loop_arrays
= 1;
3705 /* Set up parameters to be used in prefetching algorithm. Do not
3706 override the defaults unless we are tuning for a core we have
3707 researched values for. */
3708 if (current_tune
->prefetch
.num_slots
> 0)
3709 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3710 param_simultaneous_prefetches
,
3711 current_tune
->prefetch
.num_slots
);
3712 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3713 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3714 param_l1_cache_line_size
,
3715 current_tune
->prefetch
.l1_cache_line_size
);
3716 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3718 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3719 param_destruct_interfere_size
,
3720 current_tune
->prefetch
.l1_cache_line_size
);
3721 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3722 param_construct_interfere_size
,
3723 current_tune
->prefetch
.l1_cache_line_size
);
3727 /* For a generic ARM target, JF Bastien proposed using 64 for both. */
3728 /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3730 /* More recent Cortex chips have a 64-byte cache line, but are marked
3731 ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults. */
3732 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3733 param_destruct_interfere_size
, 64);
3734 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3735 param_construct_interfere_size
, 64);
3738 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3739 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3740 param_l1_cache_size
,
3741 current_tune
->prefetch
.l1_cache_size
);
3743 /* Look through ready list and all of queue for instructions
3744 relevant for L2 auto-prefetcher. */
3745 int sched_autopref_queue_depth
;
3747 switch (current_tune
->sched_autopref
)
3749 case tune_params::SCHED_AUTOPREF_OFF
:
3750 sched_autopref_queue_depth
= -1;
3753 case tune_params::SCHED_AUTOPREF_RANK
:
3754 sched_autopref_queue_depth
= 0;
3757 case tune_params::SCHED_AUTOPREF_FULL
:
3758 sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3765 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3766 param_sched_autopref_queue_depth
,
3767 sched_autopref_queue_depth
);
3769 /* Currently, for slow flash data, we just disable literal pools. We also
3770 disable it for pure-code. */
3771 if (target_slow_flash_data
|| target_pure_code
)
3772 arm_disable_literal_pool
= true;
3774 /* Disable scheduling fusion by default if it's not armv7 processor
3775 or doesn't prefer ldrd/strd. */
3776 if (flag_schedule_fusion
== 2
3777 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3778 flag_schedule_fusion
= 0;
3780 /* Need to remember initial options before they are overriden. */
3781 init_optimize
= build_optimization_node (&global_options
,
3782 &global_options_set
);
3784 arm_options_perform_arch_sanity_checks ();
3785 arm_option_override_internal (&global_options
, &global_options_set
);
3786 arm_option_check_internal (&global_options
);
3787 arm_option_params_internal ();
3789 /* Create the default target_options structure. */
3790 target_option_default_node
= target_option_current_node
3791 = build_target_option_node (&global_options
, &global_options_set
);
3793 /* Register global variables with the garbage collector. */
3794 arm_add_gc_roots ();
3796 /* Init initial mode for testing. */
3797 thumb_flipper
= TARGET_THUMB
;
3801 /* Reconfigure global status flags from the active_target.isa. */
3803 arm_option_reconfigure_globals (void)
3805 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3806 arm_base_arch
= arm_active_target
.base_arch
;
3808 /* Initialize boolean versions of the architectural flags, for use
3809 in the arm.md file. */
3810 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv4
);
3811 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3812 arm_arch5t
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5t
);
3813 arm_arch5te
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5te
);
3814 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6
);
3815 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6k
);
3816 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3817 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3818 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7
);
3819 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7em
);
3820 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8
);
3821 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_1
);
3822 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_2
);
3823 arm_arch8_3
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_3
);
3824 arm_arch8_4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_4
);
3825 arm_arch8_1m_main
= bitmap_bit_p (arm_active_target
.isa
,
3826 isa_bit_armv8_1m_main
);
3827 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3828 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3829 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3830 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3831 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3832 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3833 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3834 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3835 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3836 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3837 arm_arch_i8mm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_i8mm
);
3838 arm_arch_bf16
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_bf16
);
3840 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3843 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3844 error ("selected fp16 options are incompatible");
3845 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3849 arm_arch_cde_coproc
= 0;
3850 int cde_bits
[] = {isa_bit_cdecp0
, isa_bit_cdecp1
, isa_bit_cdecp2
,
3851 isa_bit_cdecp3
, isa_bit_cdecp4
, isa_bit_cdecp5
,
3852 isa_bit_cdecp6
, isa_bit_cdecp7
};
3853 for (int i
= 0, e
= ARRAY_SIZE (cde_bits
); i
< e
; i
++)
3855 int cde_bit
= bitmap_bit_p (arm_active_target
.isa
, cde_bits
[i
]);
3858 arm_arch_cde
|= cde_bit
;
3859 arm_arch_cde_coproc
|= arm_arch_cde_coproc_bits
[i
];
3863 /* And finally, set up some quirks. */
3864 arm_arch_no_volatile_ce
3865 = bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_volatile_ce
);
3866 arm_arch6kz
= arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
,
3867 isa_bit_quirk_armv6kz
);
3869 /* Use the cp15 method if it is available. */
3870 if (target_thread_pointer
== TP_AUTO
)
3872 if (arm_arch6k
&& !TARGET_THUMB1
)
3873 target_thread_pointer
= TP_CP15
;
3875 target_thread_pointer
= TP_SOFT
;
3878 if (!TARGET_HARD_TP
&& arm_stack_protector_guard
== SSP_TLSREG
)
3879 error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3882 /* Perform some validation between the desired architecture and the rest of the
3885 arm_options_perform_arch_sanity_checks (void)
3887 /* V5T code we generate is completely interworking capable, so we turn off
3888 TARGET_INTERWORK here to avoid many tests later on. */
3890 /* XXX However, we must pass the right pre-processor defines to CPP
3891 or GLD can get confused. This is a hack. */
3892 if (TARGET_INTERWORK
)
3893 arm_cpp_interwork
= 1;
3896 target_flags
&= ~MASK_INTERWORK
;
3898 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3899 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3901 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3902 error ("iwmmxt abi requires an iwmmxt capable cpu");
3904 /* BPABI targets use linker tricks to allow interworking on cores
3905 without thumb support. */
3906 if (TARGET_INTERWORK
3908 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3910 warning (0, "target CPU does not support interworking" );
3911 target_flags
&= ~MASK_INTERWORK
;
3914 /* If soft-float is specified then don't use FPU. */
3915 if (TARGET_SOFT_FLOAT
)
3916 arm_fpu_attr
= FPU_NONE
;
3918 arm_fpu_attr
= FPU_VFP
;
3920 if (TARGET_AAPCS_BASED
)
3922 if (TARGET_CALLER_INTERWORKING
)
3923 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3925 if (TARGET_CALLEE_INTERWORKING
)
3926 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3929 /* __fp16 support currently assumes the core has ldrh. */
3930 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3931 sorry ("%<__fp16%> and no ldrh");
3933 if (use_cmse
&& !arm_arch_cmse
)
3934 error ("target CPU does not support ARMv8-M Security Extensions");
3936 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3937 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3938 if (use_cmse
&& TARGET_HARD_FLOAT
&& LAST_VFP_REGNUM
> LAST_LO_VFP_REGNUM
)
3939 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3942 if (TARGET_AAPCS_BASED
)
3944 if (arm_abi
== ARM_ABI_IWMMXT
)
3945 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3946 else if (TARGET_HARD_FLOAT_ABI
)
3948 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3949 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_vfpv2
)
3950 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_mve
))
3951 error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
3954 arm_pcs_default
= ARM_PCS_AAPCS
;
3958 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3959 sorry ("%<-mfloat-abi=hard%> and VFP");
3961 if (arm_abi
== ARM_ABI_APCS
)
3962 arm_pcs_default
= ARM_PCS_APCS
;
3964 arm_pcs_default
= ARM_PCS_ATPCS
;
3968 /* Test whether a local function descriptor is canonical, i.e.,
3969 whether we can use GOTOFFFUNCDESC to compute the address of the
3972 arm_fdpic_local_funcdesc_p (rtx fnx
)
3975 enum symbol_visibility vis
;
3981 if (! SYMBOL_REF_LOCAL_P (fnx
))
3984 fn
= SYMBOL_REF_DECL (fnx
);
3989 vis
= DECL_VISIBILITY (fn
);
3991 if (vis
== VISIBILITY_PROTECTED
)
3992 /* Private function descriptors for protected functions are not
3993 canonical. Temporarily change the visibility to global so that
3994 we can ensure uniqueness of funcdesc pointers. */
3995 DECL_VISIBILITY (fn
) = VISIBILITY_DEFAULT
;
3997 ret
= default_binds_local_p_1 (fn
, flag_pic
);
3999 DECL_VISIBILITY (fn
) = vis
;
4005 arm_add_gc_roots (void)
4007 gcc_obstack_init(&minipool_obstack
);
4008 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
4011 /* A table of known ARM exception types.
4012 For use with the interrupt function attribute. */
4016 const char *const arg
;
4017 const unsigned long return_value
;
4021 static const isr_attribute_arg isr_attribute_args
[] =
4023 { "IRQ", ARM_FT_ISR
},
4024 { "irq", ARM_FT_ISR
},
4025 { "FIQ", ARM_FT_FIQ
},
4026 { "fiq", ARM_FT_FIQ
},
4027 { "ABORT", ARM_FT_ISR
},
4028 { "abort", ARM_FT_ISR
},
4029 { "UNDEF", ARM_FT_EXCEPTION
},
4030 { "undef", ARM_FT_EXCEPTION
},
4031 { "SWI", ARM_FT_EXCEPTION
},
4032 { "swi", ARM_FT_EXCEPTION
},
4033 { NULL
, ARM_FT_NORMAL
}
4036 /* Returns the (interrupt) function type of the current
4037 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
4039 static unsigned long
4040 arm_isr_value (tree argument
)
4042 const isr_attribute_arg
* ptr
;
4046 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
4048 /* No argument - default to IRQ. */
4049 if (argument
== NULL_TREE
)
4052 /* Get the value of the argument. */
4053 if (TREE_VALUE (argument
) == NULL_TREE
4054 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
4055 return ARM_FT_UNKNOWN
;
4057 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
4059 /* Check it against the list of known arguments. */
4060 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
4061 if (streq (arg
, ptr
->arg
))
4062 return ptr
->return_value
;
4064 /* An unrecognized interrupt type. */
4065 return ARM_FT_UNKNOWN
;
4068 /* Computes the type of the current function. */
4070 static unsigned long
4071 arm_compute_func_type (void)
4073 unsigned long type
= ARM_FT_UNKNOWN
;
4077 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
4079 /* Decide if the current function is volatile. Such functions
4080 never return, and many memory cycles can be saved by not storing
4081 register values that will never be needed again. This optimization
4082 was added to speed up context switching in a kernel application. */
4084 && (TREE_NOTHROW (current_function_decl
)
4085 || !(flag_unwind_tables
4087 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
4088 && TREE_THIS_VOLATILE (current_function_decl
))
4089 type
|= ARM_FT_VOLATILE
;
4091 if (cfun
->static_chain_decl
!= NULL
)
4092 type
|= ARM_FT_NESTED
;
4094 attr
= DECL_ATTRIBUTES (current_function_decl
);
4096 a
= lookup_attribute ("naked", attr
);
4098 type
|= ARM_FT_NAKED
;
4100 a
= lookup_attribute ("isr", attr
);
4102 a
= lookup_attribute ("interrupt", attr
);
4105 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
4107 type
|= arm_isr_value (TREE_VALUE (a
));
4109 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
4110 type
|= ARM_FT_CMSE_ENTRY
;
4115 /* Returns the type of the current function. */
4118 arm_current_func_type (void)
4120 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
4121 cfun
->machine
->func_type
= arm_compute_func_type ();
4123 return cfun
->machine
->func_type
;
4127 arm_allocate_stack_slots_for_args (void)
4129 /* Naked functions should not allocate stack slots for arguments. */
4130 return !IS_NAKED (arm_current_func_type ());
4134 arm_warn_func_return (tree decl
)
4136 /* Naked functions are implemented entirely in assembly, including the
4137 return sequence, so suppress warnings about this. */
4138 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
4142 /* Output assembler code for a block containing the constant parts
4143 of a trampoline, leaving space for the variable parts.
4145 On the ARM, (if r8 is the static chain regnum, and remembering that
4146 referencing pc adds an offset of 8) the trampoline looks like:
4149 .word static chain value
4150 .word function's address
4151 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4153 In FDPIC mode, the trampoline looks like:
4154 .word trampoline address
4155 .word trampoline GOT address
4156 ldr r12, [pc, #8] ; #4 for Arm mode
4157 ldr r9, [pc, #8] ; #4 for Arm mode
4158 ldr pc, [pc, #8] ; #4 for Arm mode
4159 .word static chain value
4161 .word function's address
4165 arm_asm_trampoline_template (FILE *f
)
4167 fprintf (f
, "\t.syntax unified\n");
4171 /* The first two words are a function descriptor pointing to the
4172 trampoline code just below. */
4174 fprintf (f
, "\t.arm\n");
4175 else if (TARGET_THUMB2
)
4176 fprintf (f
, "\t.thumb\n");
4178 /* Only ARM and Thumb-2 are supported. */
4181 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4182 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4183 /* Trampoline code which sets the static chain register but also
4184 PIC register before jumping into real code. */
4185 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4186 STATIC_CHAIN_REGNUM
, PC_REGNUM
,
4187 TARGET_THUMB2
? 8 : 4);
4188 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4189 PIC_OFFSET_TABLE_REGNUM
, PC_REGNUM
,
4190 TARGET_THUMB2
? 8 : 4);
4191 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4192 PC_REGNUM
, PC_REGNUM
,
4193 TARGET_THUMB2
? 8 : 4);
4194 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4196 else if (TARGET_ARM
)
4198 fprintf (f
, "\t.arm\n");
4199 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
4200 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
4202 else if (TARGET_THUMB2
)
4204 fprintf (f
, "\t.thumb\n");
4205 /* The Thumb-2 trampoline is similar to the arm implementation.
4206 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4207 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
4208 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
4209 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
4213 ASM_OUTPUT_ALIGN (f
, 2);
4214 fprintf (f
, "\t.code\t16\n");
4215 fprintf (f
, ".Ltrampoline_start:\n");
4216 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
4217 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4218 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
4219 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4220 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
4221 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
4223 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4224 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4227 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4230 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4232 rtx fnaddr
, mem
, a_tramp
;
4234 emit_block_move (m_tramp
, assemble_trampoline_template (),
4235 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
4239 rtx funcdesc
= XEXP (DECL_RTL (fndecl
), 0);
4240 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
4241 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
4242 /* The function start address is at offset 8, but in Thumb mode
4243 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4245 rtx trampoline_code_start
4246 = plus_constant (Pmode
, XEXP (m_tramp
, 0), TARGET_THUMB2
? 9 : 8);
4248 /* Write initial funcdesc which points to the trampoline. */
4249 mem
= adjust_address (m_tramp
, SImode
, 0);
4250 emit_move_insn (mem
, trampoline_code_start
);
4251 mem
= adjust_address (m_tramp
, SImode
, 4);
4252 emit_move_insn (mem
, gen_rtx_REG (Pmode
, PIC_OFFSET_TABLE_REGNUM
));
4253 /* Setup static chain. */
4254 mem
= adjust_address (m_tramp
, SImode
, 20);
4255 emit_move_insn (mem
, chain_value
);
4256 /* GOT + real function entry point. */
4257 mem
= adjust_address (m_tramp
, SImode
, 24);
4258 emit_move_insn (mem
, gotaddr
);
4259 mem
= adjust_address (m_tramp
, SImode
, 28);
4260 emit_move_insn (mem
, fnaddr
);
4264 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
4265 emit_move_insn (mem
, chain_value
);
4267 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
4268 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4269 emit_move_insn (mem
, fnaddr
);
4272 a_tramp
= XEXP (m_tramp
, 0);
4273 maybe_emit_call_builtin___clear_cache (a_tramp
,
4274 plus_constant (ptr_mode
,
4279 /* Thumb trampolines should be entered in thumb mode, so set
4280 the bottom bit of the address. */
4283 arm_trampoline_adjust_address (rtx addr
)
4285 /* For FDPIC don't fix trampoline address since it's a function
4286 descriptor and not a function address. */
4287 if (TARGET_THUMB
&& !TARGET_FDPIC
)
4288 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
4289 NULL
, 0, OPTAB_LIB_WIDEN
);
4293 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4294 includes call-clobbered registers too. If this is a leaf function
4295 we can just examine the registers used by the RTL, but otherwise we
4296 have to assume that whatever function is called might clobber
4297 anything, and so we have to save all the call-clobbered registers
4299 static inline bool reg_needs_saving_p (unsigned reg
)
4301 unsigned long func_type
= arm_current_func_type ();
4303 if (IS_INTERRUPT (func_type
))
4304 if (df_regs_ever_live_p (reg
)
4305 /* Save call-clobbered core registers. */
4306 || (! crtl
->is_leaf
&& call_used_or_fixed_reg_p (reg
) && reg
< FIRST_VFP_REGNUM
))
4311 if (!df_regs_ever_live_p (reg
)
4312 || call_used_or_fixed_reg_p (reg
))
4318 /* Return 1 if it is possible to return using a single instruction.
4319 If SIBLING is non-null, this is a test for a return before a sibling
4320 call. SIBLING is the call insn, so we can examine its register usage. */
4323 use_return_insn (int iscond
, rtx sibling
)
4326 unsigned int func_type
;
4327 unsigned long saved_int_regs
;
4328 unsigned HOST_WIDE_INT stack_adjust
;
4329 arm_stack_offsets
*offsets
;
4331 /* Never use a return instruction before reload has run. */
4332 if (!reload_completed
)
4335 func_type
= arm_current_func_type ();
4337 /* Naked, volatile and stack alignment functions need special
4339 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
4342 /* So do interrupt functions that use the frame pointer and Thumb
4343 interrupt functions. */
4344 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
4347 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
4348 && !optimize_function_for_size_p (cfun
))
4351 offsets
= arm_get_frame_offsets ();
4352 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
4354 /* As do variadic functions. */
4355 if (crtl
->args
.pretend_args_size
4356 || cfun
->machine
->uses_anonymous_args
4357 /* Or if the function calls __builtin_eh_return () */
4358 || crtl
->calls_eh_return
4359 /* Or if the function calls alloca */
4360 || cfun
->calls_alloca
4361 /* Or if there is a stack adjustment. However, if the stack pointer
4362 is saved on the stack, we can use a pre-incrementing stack load. */
4363 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
4364 && stack_adjust
== 4))
4365 /* Or if the static chain register was saved above the frame, under the
4366 assumption that the stack pointer isn't saved on the stack. */
4367 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
4368 && arm_compute_static_chain_stack_bytes() != 0))
4371 saved_int_regs
= offsets
->saved_regs_mask
;
4373 /* Unfortunately, the insn
4375 ldmib sp, {..., sp, ...}
4377 triggers a bug on most SA-110 based devices, such that the stack
4378 pointer won't be correctly restored if the instruction takes a
4379 page fault. We work around this problem by popping r3 along with
4380 the other registers, since that is never slower than executing
4381 another instruction.
4383 We test for !arm_arch5t here, because code for any architecture
4384 less than this could potentially be run on one of the buggy
4386 if (stack_adjust
== 4 && !arm_arch5t
&& TARGET_ARM
)
4388 /* Validate that r3 is a call-clobbered register (always true in
4389 the default abi) ... */
4390 if (!call_used_or_fixed_reg_p (3))
4393 /* ... that it isn't being used for a return value ... */
4394 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
4397 /* ... or for a tail-call argument ... */
4400 gcc_assert (CALL_P (sibling
));
4402 if (find_regno_fusage (sibling
, USE
, 3))
4406 /* ... and that there are no call-saved registers in r0-r2
4407 (always true in the default ABI). */
4408 if (saved_int_regs
& 0x7)
4412 /* Can't be done if interworking with Thumb, and any registers have been
4414 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
4417 /* On StrongARM, conditional returns are expensive if they aren't
4418 taken and multiple registers have been stacked. */
4419 if (iscond
&& arm_tune_strongarm
)
4421 /* Conditional return when just the LR is stored is a simple
4422 conditional-load instruction, that's not expensive. */
4423 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
4427 && arm_pic_register
!= INVALID_REGNUM
4428 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4432 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4433 several instructions if anything needs to be popped. Armv8.1-M Mainline
4434 also needs several instructions to save and restore FP context. */
4435 if (IS_CMSE_ENTRY (func_type
) && (saved_int_regs
|| TARGET_HAVE_FPCXT_CMSE
))
4438 /* If there are saved registers but the LR isn't saved, then we need
4439 two instructions for the return. */
4440 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4443 /* Can't be done if any of the VFP regs are pushed,
4444 since this also requires an insn. */
4445 if (TARGET_VFP_BASE
)
4446 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4447 if (reg_needs_saving_p (regno
))
4450 if (TARGET_REALLY_IWMMXT
)
4451 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4452 if (reg_needs_saving_p (regno
))
4458 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4459 shrink-wrapping if possible. This is the case if we need to emit a
4460 prologue, which we can test by looking at the offsets. */
4462 use_simple_return_p (void)
4464 arm_stack_offsets
*offsets
;
4466 /* Note this function can be called before or after reload. */
4467 if (!reload_completed
)
4468 arm_compute_frame_layout ();
4470 offsets
= arm_get_frame_offsets ();
4471 return offsets
->outgoing_args
!= 0;
4474 /* Return TRUE if int I is a valid immediate ARM constant. */
4477 const_ok_for_arm (HOST_WIDE_INT i
)
4481 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4482 be all zero, or all one. */
4483 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4484 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4485 != ((~(unsigned HOST_WIDE_INT
) 0)
4486 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4489 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4491 /* Fast return for 0 and small values. We must do this for zero, since
4492 the code below can't handle that one case. */
4493 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4496 /* Get the number of trailing zeros. */
4497 lowbit
= ffs((int) i
) - 1;
4499 /* Only even shifts are allowed in ARM mode so round down to the
4500 nearest even number. */
4504 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4509 /* Allow rotated constants in ARM mode. */
4511 && ((i
& ~0xc000003f) == 0
4512 || (i
& ~0xf000000f) == 0
4513 || (i
& ~0xfc000003) == 0))
4516 else if (TARGET_THUMB2
)
4520 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4523 if (i
== v
|| i
== (v
| (v
<< 8)))
4526 /* Allow repeated pattern 0xXY00XY00. */
4532 else if (TARGET_HAVE_MOVT
)
4534 /* Thumb-1 Targets with MOVT. */
4544 /* Return true if I is a valid constant for the operation CODE. */
4546 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4548 if (const_ok_for_arm (i
))
4554 /* See if we can use movw. */
4555 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4558 /* Otherwise, try mvn. */
4559 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4562 /* See if we can use addw or subw. */
4564 && ((i
& 0xfffff000) == 0
4565 || ((-i
) & 0xfffff000) == 0))
4586 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4588 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4594 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4598 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4605 /* Return true if I is a valid di mode constant for the operation CODE. */
4607 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4609 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4610 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4611 rtx hi
= GEN_INT (hi_val
);
4612 rtx lo
= GEN_INT (lo_val
);
4622 return const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF
4623 || const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF;
4625 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4632 /* Emit a sequence of insns to handle a large constant.
4633 CODE is the code of the operation required, it can be any of SET, PLUS,
4634 IOR, AND, XOR, MINUS;
4635 MODE is the mode in which the operation is being performed;
4636 VAL is the integer to operate on;
4637 SOURCE is the other operand (a register, or a null-pointer for SET);
4638 SUBTARGETS means it is safe to create scratch registers if that will
4639 either produce a simpler sequence, or we will want to cse the values.
4640 Return value is the number of insns emitted. */
4642 /* ??? Tweak this for thumb2. */
4644 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4645 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4649 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4650 cond
= COND_EXEC_TEST (PATTERN (insn
));
4654 if (subtargets
|| code
== SET
4655 || (REG_P (target
) && REG_P (source
)
4656 && REGNO (target
) != REGNO (source
)))
4658 /* After arm_reorg has been called, we can't fix up expensive
4659 constants by pushing them into memory so we must synthesize
4660 them in-line, regardless of the cost. This is only likely to
4661 be more costly on chips that have load delay slots and we are
4662 compiling without running the scheduler (so no splitting
4663 occurred before the final instruction emission).
4665 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4667 if (!cfun
->machine
->after_arm_reorg
4669 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4671 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4676 /* Currently SET is the only monadic value for CODE, all
4677 the rest are diadic. */
4678 if (TARGET_USE_MOVT
)
4679 arm_emit_movpair (target
, GEN_INT (val
));
4681 emit_set_insn (target
, GEN_INT (val
));
4687 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4689 if (TARGET_USE_MOVT
)
4690 arm_emit_movpair (temp
, GEN_INT (val
));
4692 emit_set_insn (temp
, GEN_INT (val
));
4694 /* For MINUS, the value is subtracted from, since we never
4695 have subtraction of a constant. */
4697 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4699 emit_set_insn (target
,
4700 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4706 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4710 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4711 ARM/THUMB2 immediates, and add up to VAL.
4712 Thr function return value gives the number of insns required. */
4714 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4715 struct four_ints
*return_sequence
)
4717 int best_consecutive_zeros
= 0;
4721 struct four_ints tmp_sequence
;
4723 /* If we aren't targeting ARM, the best place to start is always at
4724 the bottom, otherwise look more closely. */
4727 for (i
= 0; i
< 32; i
+= 2)
4729 int consecutive_zeros
= 0;
4731 if (!(val
& (3 << i
)))
4733 while ((i
< 32) && !(val
& (3 << i
)))
4735 consecutive_zeros
+= 2;
4738 if (consecutive_zeros
> best_consecutive_zeros
)
4740 best_consecutive_zeros
= consecutive_zeros
;
4741 best_start
= i
- consecutive_zeros
;
4748 /* So long as it won't require any more insns to do so, it's
4749 desirable to emit a small constant (in bits 0...9) in the last
4750 insn. This way there is more chance that it can be combined with
4751 a later addressing insn to form a pre-indexed load or store
4752 operation. Consider:
4754 *((volatile int *)0xe0000100) = 1;
4755 *((volatile int *)0xe0000110) = 2;
4757 We want this to wind up as:
4761 str rB, [rA, #0x100]
4763 str rB, [rA, #0x110]
4765 rather than having to synthesize both large constants from scratch.
4767 Therefore, we calculate how many insns would be required to emit
4768 the constant starting from `best_start', and also starting from
4769 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4770 yield a shorter sequence, we may as well use zero. */
4771 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4773 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4775 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4776 if (insns2
<= insns1
)
4778 *return_sequence
= tmp_sequence
;
4786 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4788 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4789 struct four_ints
*return_sequence
, int i
)
4791 int remainder
= val
& 0xffffffff;
4794 /* Try and find a way of doing the job in either two or three
4797 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4798 location. We start at position I. This may be the MSB, or
4799 optimial_immediate_sequence may have positioned it at the largest block
4800 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4801 wrapping around to the top of the word when we drop off the bottom.
4802 In the worst case this code should produce no more than four insns.
4804 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4805 constants, shifted to any arbitrary location. We should always start
4810 unsigned int b1
, b2
, b3
, b4
;
4811 unsigned HOST_WIDE_INT result
;
4814 gcc_assert (insns
< 4);
4819 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4820 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4823 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4824 /* We can use addw/subw for the last 12 bits. */
4828 /* Use an 8-bit shifted/rotated immediate. */
4832 result
= remainder
& ((0x0ff << end
)
4833 | ((i
< end
) ? (0xff >> (32 - end
))
4840 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4841 arbitrary shifts. */
4842 i
-= TARGET_ARM
? 2 : 1;
4846 /* Next, see if we can do a better job with a thumb2 replicated
4849 We do it this way around to catch the cases like 0x01F001E0 where
4850 two 8-bit immediates would work, but a replicated constant would
4853 TODO: 16-bit constants that don't clear all the bits, but still win.
4854 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4857 b1
= (remainder
& 0xff000000) >> 24;
4858 b2
= (remainder
& 0x00ff0000) >> 16;
4859 b3
= (remainder
& 0x0000ff00) >> 8;
4860 b4
= remainder
& 0xff;
4864 /* The 8-bit immediate already found clears b1 (and maybe b2),
4865 but must leave b3 and b4 alone. */
4867 /* First try to find a 32-bit replicated constant that clears
4868 almost everything. We can assume that we can't do it in one,
4869 or else we wouldn't be here. */
4870 unsigned int tmp
= b1
& b2
& b3
& b4
;
4871 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4873 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4874 + (tmp
== b3
) + (tmp
== b4
);
4876 && (matching_bytes
>= 3
4877 || (matching_bytes
== 2
4878 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4880 /* At least 3 of the bytes match, and the fourth has at
4881 least as many bits set, or two of the bytes match
4882 and it will only require one more insn to finish. */
4890 /* Second, try to find a 16-bit replicated constant that can
4891 leave three of the bytes clear. If b2 or b4 is already
4892 zero, then we can. If the 8-bit from above would not
4893 clear b2 anyway, then we still win. */
4894 else if (b1
== b3
&& (!b2
|| !b4
4895 || (remainder
& 0x00ff0000 & ~result
)))
4897 result
= remainder
& 0xff00ff00;
4903 /* The 8-bit immediate already found clears b2 (and maybe b3)
4904 and we don't get here unless b1 is alredy clear, but it will
4905 leave b4 unchanged. */
4907 /* If we can clear b2 and b4 at once, then we win, since the
4908 8-bits couldn't possibly reach that far. */
4911 result
= remainder
& 0x00ff00ff;
4917 return_sequence
->i
[insns
++] = result
;
4918 remainder
&= ~result
;
4920 if (code
== SET
|| code
== MINUS
)
4928 /* Emit an instruction with the indicated PATTERN. If COND is
4929 non-NULL, conditionalize the execution of the instruction on COND
4933 emit_constant_insn (rtx cond
, rtx pattern
)
4936 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4937 emit_insn (pattern
);
4940 /* As above, but extra parameter GENERATE which, if clear, suppresses
4944 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4945 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4946 int subtargets
, int generate
)
4950 int final_invert
= 0;
4952 int set_sign_bit_copies
= 0;
4953 int clear_sign_bit_copies
= 0;
4954 int clear_zero_bit_copies
= 0;
4955 int set_zero_bit_copies
= 0;
4956 int insns
= 0, neg_insns
, inv_insns
;
4957 unsigned HOST_WIDE_INT temp1
, temp2
;
4958 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4959 struct four_ints
*immediates
;
4960 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4962 /* Find out which operations are safe for a given CODE. Also do a quick
4963 check for degenerate cases; these can occur when DImode operations
4976 if (remainder
== 0xffffffff)
4979 emit_constant_insn (cond
,
4980 gen_rtx_SET (target
,
4981 GEN_INT (ARM_SIGN_EXTEND (val
))));
4987 if (reload_completed
&& rtx_equal_p (target
, source
))
4991 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5000 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
5003 if (remainder
== 0xffffffff)
5005 if (reload_completed
&& rtx_equal_p (target
, source
))
5008 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5017 if (reload_completed
&& rtx_equal_p (target
, source
))
5020 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5024 if (remainder
== 0xffffffff)
5027 emit_constant_insn (cond
,
5028 gen_rtx_SET (target
,
5029 gen_rtx_NOT (mode
, source
)));
5036 /* We treat MINUS as (val - source), since (source - val) is always
5037 passed as (source + (-val)). */
5041 emit_constant_insn (cond
,
5042 gen_rtx_SET (target
,
5043 gen_rtx_NEG (mode
, source
)));
5046 if (const_ok_for_arm (val
))
5049 emit_constant_insn (cond
,
5050 gen_rtx_SET (target
,
5051 gen_rtx_MINUS (mode
, GEN_INT (val
),
5062 /* If we can do it in one insn get out quickly. */
5063 if (const_ok_for_op (val
, code
))
5066 emit_constant_insn (cond
,
5067 gen_rtx_SET (target
,
5069 ? gen_rtx_fmt_ee (code
, mode
, source
,
5075 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5077 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
5078 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
5082 if (mode
== SImode
&& i
== 16)
5083 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5085 emit_constant_insn (cond
,
5086 gen_zero_extendhisi2
5087 (target
, gen_lowpart (HImode
, source
)));
5089 /* Extz only supports SImode, but we can coerce the operands
5091 emit_constant_insn (cond
,
5092 gen_extzv_t2 (gen_lowpart (SImode
, target
),
5093 gen_lowpart (SImode
, source
),
5094 GEN_INT (i
), const0_rtx
));
5100 /* Calculate a few attributes that may be useful for specific
5102 /* Count number of leading zeros. */
5103 for (i
= 31; i
>= 0; i
--)
5105 if ((remainder
& (1 << i
)) == 0)
5106 clear_sign_bit_copies
++;
5111 /* Count number of leading 1's. */
5112 for (i
= 31; i
>= 0; i
--)
5114 if ((remainder
& (1 << i
)) != 0)
5115 set_sign_bit_copies
++;
5120 /* Count number of trailing zero's. */
5121 for (i
= 0; i
<= 31; i
++)
5123 if ((remainder
& (1 << i
)) == 0)
5124 clear_zero_bit_copies
++;
5129 /* Count number of trailing 1's. */
5130 for (i
= 0; i
<= 31; i
++)
5132 if ((remainder
& (1 << i
)) != 0)
5133 set_zero_bit_copies
++;
5141 /* See if we can do this by sign_extending a constant that is known
5142 to be negative. This is a good, way of doing it, since the shift
5143 may well merge into a subsequent insn. */
5144 if (set_sign_bit_copies
> 1)
5146 if (const_ok_for_arm
5147 (temp1
= ARM_SIGN_EXTEND (remainder
5148 << (set_sign_bit_copies
- 1))))
5152 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5153 emit_constant_insn (cond
,
5154 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5155 emit_constant_insn (cond
,
5156 gen_ashrsi3 (target
, new_src
,
5157 GEN_INT (set_sign_bit_copies
- 1)));
5161 /* For an inverted constant, we will need to set the low bits,
5162 these will be shifted out of harm's way. */
5163 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
5164 if (const_ok_for_arm (~temp1
))
5168 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5169 emit_constant_insn (cond
,
5170 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5171 emit_constant_insn (cond
,
5172 gen_ashrsi3 (target
, new_src
,
5173 GEN_INT (set_sign_bit_copies
- 1)));
5179 /* See if we can calculate the value as the difference between two
5180 valid immediates. */
5181 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
5183 int topshift
= clear_sign_bit_copies
& ~1;
5185 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
5186 & (0xff000000 >> topshift
));
5188 /* If temp1 is zero, then that means the 9 most significant
5189 bits of remainder were 1 and we've caused it to overflow.
5190 When topshift is 0 we don't need to do anything since we
5191 can borrow from 'bit 32'. */
5192 if (temp1
== 0 && topshift
!= 0)
5193 temp1
= 0x80000000 >> (topshift
- 1);
5195 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
5197 if (const_ok_for_arm (temp2
))
5201 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5202 emit_constant_insn (cond
,
5203 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5204 emit_constant_insn (cond
,
5205 gen_addsi3 (target
, new_src
,
5213 /* See if we can generate this by setting the bottom (or the top)
5214 16 bits, and then shifting these into the other half of the
5215 word. We only look for the simplest cases, to do more would cost
5216 too much. Be careful, however, not to generate this when the
5217 alternative would take fewer insns. */
5218 if (val
& 0xffff0000)
5220 temp1
= remainder
& 0xffff0000;
5221 temp2
= remainder
& 0x0000ffff;
5223 /* Overlaps outside this range are best done using other methods. */
5224 for (i
= 9; i
< 24; i
++)
5226 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
5227 && !const_ok_for_arm (temp2
))
5229 rtx new_src
= (subtargets
5230 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
5232 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
5233 source
, subtargets
, generate
);
5241 gen_rtx_ASHIFT (mode
, source
,
5248 /* Don't duplicate cases already considered. */
5249 for (i
= 17; i
< 24; i
++)
5251 if (((temp1
| (temp1
>> i
)) == remainder
)
5252 && !const_ok_for_arm (temp1
))
5254 rtx new_src
= (subtargets
5255 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
5257 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
5258 source
, subtargets
, generate
);
5263 gen_rtx_SET (target
,
5266 gen_rtx_LSHIFTRT (mode
, source
,
5277 /* If we have IOR or XOR, and the constant can be loaded in a
5278 single instruction, and we can find a temporary to put it in,
5279 then this can be done in two instructions instead of 3-4. */
5281 /* TARGET can't be NULL if SUBTARGETS is 0 */
5282 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
5284 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
5288 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5290 emit_constant_insn (cond
,
5291 gen_rtx_SET (sub
, GEN_INT (val
)));
5292 emit_constant_insn (cond
,
5293 gen_rtx_SET (target
,
5294 gen_rtx_fmt_ee (code
, mode
,
5305 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5306 and the remainder 0s for e.g. 0xfff00000)
5307 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5309 This can be done in 2 instructions by using shifts with mov or mvn.
5314 mvn r0, r0, lsr #12 */
5315 if (set_sign_bit_copies
> 8
5316 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
5320 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5321 rtx shift
= GEN_INT (set_sign_bit_copies
);
5327 gen_rtx_ASHIFT (mode
,
5332 gen_rtx_SET (target
,
5334 gen_rtx_LSHIFTRT (mode
, sub
,
5341 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5343 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5345 For eg. r0 = r0 | 0xfff
5350 if (set_zero_bit_copies
> 8
5351 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
5355 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5356 rtx shift
= GEN_INT (set_zero_bit_copies
);
5362 gen_rtx_LSHIFTRT (mode
,
5367 gen_rtx_SET (target
,
5369 gen_rtx_ASHIFT (mode
, sub
,
5375 /* This will never be reached for Thumb2 because orn is a valid
5376 instruction. This is for Thumb1 and the ARM 32 bit cases.
5378 x = y | constant (such that ~constant is a valid constant)
5380 x = ~(~y & ~constant).
5382 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
5386 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5387 emit_constant_insn (cond
,
5389 gen_rtx_NOT (mode
, source
)));
5392 sub
= gen_reg_rtx (mode
);
5393 emit_constant_insn (cond
,
5395 gen_rtx_AND (mode
, source
,
5397 emit_constant_insn (cond
,
5398 gen_rtx_SET (target
,
5399 gen_rtx_NOT (mode
, sub
)));
5406 /* See if two shifts will do 2 or more insn's worth of work. */
5407 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
5409 HOST_WIDE_INT shift_mask
= ((0xffffffff
5410 << (32 - clear_sign_bit_copies
))
5413 if ((remainder
| shift_mask
) != 0xffffffff)
5415 HOST_WIDE_INT new_val
5416 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5420 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5421 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
5422 new_src
, source
, subtargets
, 1);
5427 rtx targ
= subtargets
? NULL_RTX
: target
;
5428 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5429 targ
, source
, subtargets
, 0);
5435 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5436 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5438 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5439 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5445 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5447 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5449 if ((remainder
| shift_mask
) != 0xffffffff)
5451 HOST_WIDE_INT new_val
5452 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5455 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5457 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5458 new_src
, source
, subtargets
, 1);
5463 rtx targ
= subtargets
? NULL_RTX
: target
;
5465 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5466 targ
, source
, subtargets
, 0);
5472 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5473 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5475 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5476 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5488 /* Calculate what the instruction sequences would be if we generated it
5489 normally, negated, or inverted. */
5491 /* AND cannot be split into multiple insns, so invert and use BIC. */
5494 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5497 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5502 if (can_invert
|| final_invert
)
5503 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5508 immediates
= &pos_immediates
;
5510 /* Is the negated immediate sequence more efficient? */
5511 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5514 immediates
= &neg_immediates
;
5519 /* Is the inverted immediate sequence more efficient?
5520 We must allow for an extra NOT instruction for XOR operations, although
5521 there is some chance that the final 'mvn' will get optimized later. */
5522 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5525 immediates
= &inv_immediates
;
5533 /* Now output the chosen sequence as instructions. */
5536 for (i
= 0; i
< insns
; i
++)
5538 rtx new_src
, temp1_rtx
;
5540 temp1
= immediates
->i
[i
];
5542 if (code
== SET
|| code
== MINUS
)
5543 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5544 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5545 new_src
= gen_reg_rtx (mode
);
5551 else if (can_negate
)
5554 temp1
= trunc_int_for_mode (temp1
, mode
);
5555 temp1_rtx
= GEN_INT (temp1
);
5559 else if (code
== MINUS
)
5560 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5562 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5564 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5569 can_negate
= can_invert
;
5573 else if (code
== MINUS
)
5581 emit_constant_insn (cond
, gen_rtx_SET (target
,
5582 gen_rtx_NOT (mode
, source
)));
5589 /* Return TRUE if op is a constant where both the low and top words are
5590 suitable for RSB/RSC instructions. This is never true for Thumb, since
5591 we do not have RSC in that case. */
5593 arm_const_double_prefer_rsbs_rsc (rtx op
)
5595 /* Thumb lacks RSC, so we never prefer that sequence. */
5596 if (TARGET_THUMB
|| !CONST_INT_P (op
))
5598 HOST_WIDE_INT hi
, lo
;
5599 lo
= UINTVAL (op
) & 0xffffffffULL
;
5600 hi
= UINTVAL (op
) >> 32;
5601 return const_ok_for_arm (lo
) && const_ok_for_arm (hi
);
5604 /* Canonicalize a comparison so that we are more likely to recognize it.
5605 This can be done for a few constant compares, where we can make the
5606 immediate value easier to load. */
5609 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5610 bool op0_preserve_value
)
5613 unsigned HOST_WIDE_INT i
, maxval
;
5615 mode
= GET_MODE (*op0
);
5616 if (mode
== VOIDmode
)
5617 mode
= GET_MODE (*op1
);
5619 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5621 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5622 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5623 either reversed or (for constant OP1) adjusted to GE/LT.
5624 Similarly for GTU/LEU in Thumb mode. */
5628 if (*code
== GT
|| *code
== LE
5629 || *code
== GTU
|| *code
== LEU
)
5631 /* Missing comparison. First try to use an available
5633 if (CONST_INT_P (*op1
))
5642 /* Try to convert to GE/LT, unless that would be more
5644 if (!arm_const_double_by_immediates (GEN_INT (i
+ 1))
5645 && arm_const_double_prefer_rsbs_rsc (*op1
))
5647 *op1
= GEN_INT (i
+ 1);
5648 *code
= *code
== GT
? GE
: LT
;
5652 /* GT maxval is always false, LE maxval is always true.
5653 We can't fold that away here as we must make a
5654 comparison, but we can fold them to comparisons
5655 with the same result that can be handled:
5656 op0 GT maxval -> op0 LT minval
5657 op0 LE maxval -> op0 GE minval
5658 where minval = (-maxval - 1). */
5659 *op1
= GEN_INT (-maxval
- 1);
5660 *code
= *code
== GT
? LT
: GE
;
5666 if (i
!= ~((unsigned HOST_WIDE_INT
) 0))
5668 /* Try to convert to GEU/LTU, unless that would
5669 be more expensive. */
5670 if (!arm_const_double_by_immediates (GEN_INT (i
+ 1))
5671 && arm_const_double_prefer_rsbs_rsc (*op1
))
5673 *op1
= GEN_INT (i
+ 1);
5674 *code
= *code
== GTU
? GEU
: LTU
;
5678 /* GTU ~0 is always false, LEU ~0 is always true.
5679 We can't fold that away here as we must make a
5680 comparison, but we can fold them to comparisons
5681 with the same result that can be handled:
5682 op0 GTU ~0 -> op0 LTU 0
5683 op0 LEU ~0 -> op0 GEU 0. */
5685 *code
= *code
== GTU
? LTU
: GEU
;
5694 if (!op0_preserve_value
)
5696 std::swap (*op0
, *op1
);
5697 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5703 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5704 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5705 to facilitate possible combining with a cmp into 'ands'. */
5707 && GET_CODE (*op0
) == ZERO_EXTEND
5708 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5709 && GET_MODE (XEXP (*op0
, 0)) == QImode
5710 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5711 && subreg_lowpart_p (XEXP (*op0
, 0))
5712 && *op1
== const0_rtx
)
5713 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5716 /* Comparisons smaller than DImode. Only adjust comparisons against
5717 an out-of-range constant. */
5718 if (!CONST_INT_P (*op1
)
5719 || const_ok_for_arm (INTVAL (*op1
))
5720 || const_ok_for_arm (- INTVAL (*op1
)))
5734 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5736 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5737 *code
= *code
== GT
? GE
: LT
;
5745 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5747 *op1
= GEN_INT (i
- 1);
5748 *code
= *code
== GE
? GT
: LE
;
5755 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5756 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5758 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5759 *code
= *code
== GTU
? GEU
: LTU
;
5767 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5769 *op1
= GEN_INT (i
- 1);
5770 *code
= *code
== GEU
? GTU
: LEU
;
5781 /* Define how to find the value returned by a function. */
5784 arm_function_value(const_tree type
, const_tree func
,
5785 bool outgoing ATTRIBUTE_UNUSED
)
5788 int unsignedp ATTRIBUTE_UNUSED
;
5789 rtx r ATTRIBUTE_UNUSED
;
5791 mode
= TYPE_MODE (type
);
5793 if (TARGET_AAPCS_BASED
)
5794 return aapcs_allocate_return_reg (mode
, type
, func
);
5796 /* Promote integer types. */
5797 if (INTEGRAL_TYPE_P (type
))
5798 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5800 /* Promotes small structs returned in a register to full-word size
5801 for big-endian AAPCS. */
5802 if (arm_return_in_msb (type
))
5804 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5805 if (size
% UNITS_PER_WORD
!= 0)
5807 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5808 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
5812 return arm_libcall_value_1 (mode
);
5815 /* libcall hashtable helpers. */
5817 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5819 static inline hashval_t
hash (const rtx_def
*);
5820 static inline bool equal (const rtx_def
*, const rtx_def
*);
5821 static inline void remove (rtx_def
*);
5825 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5827 return rtx_equal_p (p1
, p2
);
5831 libcall_hasher::hash (const rtx_def
*p1
)
5833 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5836 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5839 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5841 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5845 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5847 static bool init_done
= false;
5848 static libcall_table_type
*libcall_htab
= NULL
;
5854 libcall_htab
= new libcall_table_type (31);
5855 add_libcall (libcall_htab
,
5856 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5857 add_libcall (libcall_htab
,
5858 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5859 add_libcall (libcall_htab
,
5860 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5861 add_libcall (libcall_htab
,
5862 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5864 add_libcall (libcall_htab
,
5865 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5866 add_libcall (libcall_htab
,
5867 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5868 add_libcall (libcall_htab
,
5869 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5870 add_libcall (libcall_htab
,
5871 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5873 add_libcall (libcall_htab
,
5874 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5875 add_libcall (libcall_htab
,
5876 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5877 add_libcall (libcall_htab
,
5878 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5879 add_libcall (libcall_htab
,
5880 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5881 add_libcall (libcall_htab
,
5882 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5883 add_libcall (libcall_htab
,
5884 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5885 add_libcall (libcall_htab
,
5886 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5887 add_libcall (libcall_htab
,
5888 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5889 add_libcall (libcall_htab
,
5890 convert_optab_libfunc (sfix_optab
, SImode
, SFmode
));
5891 add_libcall (libcall_htab
,
5892 convert_optab_libfunc (ufix_optab
, SImode
, SFmode
));
5894 /* Values from double-precision helper functions are returned in core
5895 registers if the selected core only supports single-precision
5896 arithmetic, even if we are using the hard-float ABI. The same is
5897 true for single-precision helpers except in case of MVE, because in
5898 MVE we will be using the hard-float ABI on a CPU which doesn't support
5899 single-precision operations in hardware. In MVE the following check
5900 enables use of emulation for the single-precision arithmetic
5902 if (TARGET_HAVE_MVE
)
5904 add_libcall (libcall_htab
, optab_libfunc (add_optab
, SFmode
));
5905 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, SFmode
));
5906 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, SFmode
));
5907 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, SFmode
));
5908 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, SFmode
));
5909 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, SFmode
));
5910 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, SFmode
));
5911 add_libcall (libcall_htab
, optab_libfunc (le_optab
, SFmode
));
5912 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, SFmode
));
5913 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, SFmode
));
5914 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, SFmode
));
5916 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5917 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5918 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5919 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5920 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5921 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5922 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5923 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5924 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5925 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5926 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5927 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5929 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5931 add_libcall (libcall_htab
,
5932 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5935 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5939 arm_libcall_value_1 (machine_mode mode
)
5941 if (TARGET_AAPCS_BASED
)
5942 return aapcs_libcall_value (mode
);
5943 else if (TARGET_IWMMXT_ABI
5944 && arm_vector_mode_supported_p (mode
))
5945 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5947 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5950 /* Define how to find the value returned by a library function
5951 assuming the value has mode MODE. */
5954 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5956 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5957 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5959 /* The following libcalls return their result in integer registers,
5960 even though they return a floating point value. */
5961 if (arm_libcall_uses_aapcs_base (libcall
))
5962 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5966 return arm_libcall_value_1 (mode
);
5969 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5972 arm_function_value_regno_p (const unsigned int regno
)
5974 if (regno
== ARG_REGISTER (1)
5976 && TARGET_AAPCS_BASED
5977 && TARGET_HARD_FLOAT
5978 && regno
== FIRST_VFP_REGNUM
)
5979 || (TARGET_IWMMXT_ABI
5980 && regno
== FIRST_IWMMXT_REGNUM
))
5986 /* Determine the amount of memory needed to store the possible return
5987 registers of an untyped call. */
5989 arm_apply_result_size (void)
5995 if (TARGET_HARD_FLOAT_ABI
)
5997 if (TARGET_IWMMXT_ABI
)
6004 /* Decide whether TYPE should be returned in memory (true)
6005 or in a register (false). FNTYPE is the type of the function making
6008 arm_return_in_memory (const_tree type
, const_tree fntype
)
6012 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
6014 if (TARGET_AAPCS_BASED
)
6016 /* Simple, non-aggregate types (ie not including vectors and
6017 complex) are always returned in a register (or registers).
6018 We don't care about which register here, so we can short-cut
6019 some of the detail. */
6020 if (!AGGREGATE_TYPE_P (type
)
6021 && TREE_CODE (type
) != VECTOR_TYPE
6022 && TREE_CODE (type
) != COMPLEX_TYPE
)
6025 /* Any return value that is no larger than one word can be
6027 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
6030 /* Check any available co-processors to see if they accept the
6031 type as a register candidate (VFP, for example, can return
6032 some aggregates in consecutive registers). These aren't
6033 available if the call is variadic. */
6034 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
6037 /* Vector values should be returned using ARM registers, not
6038 memory (unless they're over 16 bytes, which will break since
6039 we only have four call-clobbered registers to play with). */
6040 if (TREE_CODE (type
) == VECTOR_TYPE
)
6041 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
6043 /* The rest go in memory. */
6047 if (TREE_CODE (type
) == VECTOR_TYPE
)
6048 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
6050 if (!AGGREGATE_TYPE_P (type
) &&
6051 (TREE_CODE (type
) != VECTOR_TYPE
))
6052 /* All simple types are returned in registers. */
6055 if (arm_abi
!= ARM_ABI_APCS
)
6057 /* ATPCS and later return aggregate types in memory only if they are
6058 larger than a word (or are variable size). */
6059 return (size
< 0 || size
> UNITS_PER_WORD
);
6062 /* For the arm-wince targets we choose to be compatible with Microsoft's
6063 ARM and Thumb compilers, which always return aggregates in memory. */
6065 /* All structures/unions bigger than one word are returned in memory.
6066 Also catch the case where int_size_in_bytes returns -1. In this case
6067 the aggregate is either huge or of variable size, and in either case
6068 we will want to return it via memory and not in a register. */
6069 if (size
< 0 || size
> UNITS_PER_WORD
)
6072 if (TREE_CODE (type
) == RECORD_TYPE
)
6076 /* For a struct the APCS says that we only return in a register
6077 if the type is 'integer like' and every addressable element
6078 has an offset of zero. For practical purposes this means
6079 that the structure can have at most one non bit-field element
6080 and that this element must be the first one in the structure. */
6082 /* Find the first field, ignoring non FIELD_DECL things which will
6083 have been created by C++. */
6084 /* NOTE: This code is deprecated and has not been updated to handle
6085 DECL_FIELD_ABI_IGNORED. */
6086 for (field
= TYPE_FIELDS (type
);
6087 field
&& TREE_CODE (field
) != FIELD_DECL
;
6088 field
= DECL_CHAIN (field
))
6092 return false; /* An empty structure. Allowed by an extension to ANSI C. */
6094 /* Check that the first field is valid for returning in a register. */
6096 /* ... Floats are not allowed */
6097 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
6100 /* ... Aggregates that are not themselves valid for returning in
6101 a register are not allowed. */
6102 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
6105 /* Now check the remaining fields, if any. Only bitfields are allowed,
6106 since they are not addressable. */
6107 for (field
= DECL_CHAIN (field
);
6109 field
= DECL_CHAIN (field
))
6111 if (TREE_CODE (field
) != FIELD_DECL
)
6114 if (!DECL_BIT_FIELD_TYPE (field
))
6121 if (TREE_CODE (type
) == UNION_TYPE
)
6125 /* Unions can be returned in registers if every element is
6126 integral, or can be returned in an integer register. */
6127 for (field
= TYPE_FIELDS (type
);
6129 field
= DECL_CHAIN (field
))
6131 if (TREE_CODE (field
) != FIELD_DECL
)
6134 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
6137 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
6143 #endif /* not ARM_WINCE */
6145 /* Return all other types in memory. */
6149 const struct pcs_attribute_arg
6153 } pcs_attribute_args
[] =
6155 {"aapcs", ARM_PCS_AAPCS
},
6156 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
6158 /* We could recognize these, but changes would be needed elsewhere
6159 * to implement them. */
6160 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
6161 {"atpcs", ARM_PCS_ATPCS
},
6162 {"apcs", ARM_PCS_APCS
},
6164 {NULL
, ARM_PCS_UNKNOWN
}
6168 arm_pcs_from_attribute (tree attr
)
6170 const struct pcs_attribute_arg
*ptr
;
6173 /* Get the value of the argument. */
6174 if (TREE_VALUE (attr
) == NULL_TREE
6175 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
6176 return ARM_PCS_UNKNOWN
;
6178 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
6180 /* Check it against the list of known arguments. */
6181 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
6182 if (streq (arg
, ptr
->arg
))
6185 /* An unrecognized interrupt type. */
6186 return ARM_PCS_UNKNOWN
;
6189 /* Get the PCS variant to use for this call. TYPE is the function's type
6190 specification, DECL is the specific declartion. DECL may be null if
6191 the call could be indirect or if this is a library call. */
6193 arm_get_pcs_model (const_tree type
, const_tree decl ATTRIBUTE_UNUSED
)
6195 bool user_convention
= false;
6196 enum arm_pcs user_pcs
= arm_pcs_default
;
6201 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
6204 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
6205 user_convention
= true;
6208 if (TARGET_AAPCS_BASED
)
6210 /* Detect varargs functions. These always use the base rules
6211 (no argument is ever a candidate for a co-processor
6213 bool base_rules
= stdarg_p (type
);
6215 if (user_convention
)
6217 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
6218 sorry ("non-AAPCS derived PCS variant");
6219 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
6220 error ("variadic functions must use the base AAPCS variant");
6224 return ARM_PCS_AAPCS
;
6225 else if (user_convention
)
6228 /* Unfortunately, this is not safe and can lead to wrong code
6229 being generated (PR96882). Not all calls into the back-end
6230 pass the DECL, so it is unsafe to make any PCS-changing
6231 decisions based on it. In particular the RETURN_IN_MEMORY
6232 hook is only ever passed a TYPE. This needs revisiting to
6233 see if there are any partial improvements that can be
6235 else if (decl
&& flag_unit_at_a_time
)
6237 /* Local functions never leak outside this compilation unit,
6238 so we are free to use whatever conventions are
6240 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6241 cgraph_node
*local_info_node
6242 = cgraph_node::local_info_node (CONST_CAST_TREE (decl
));
6243 if (local_info_node
&& local_info_node
->local
)
6244 return ARM_PCS_AAPCS_LOCAL
;
6248 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
6249 sorry ("PCS variant");
6251 /* For everything else we use the target's default. */
6252 return arm_pcs_default
;
6257 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6258 const_tree fntype ATTRIBUTE_UNUSED
,
6259 rtx libcall ATTRIBUTE_UNUSED
,
6260 const_tree fndecl ATTRIBUTE_UNUSED
)
6262 /* Record the unallocated VFP registers. */
6263 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
6264 pcum
->aapcs_vfp_reg_alloc
= 0;
6267 /* Bitmasks that indicate whether earlier versions of GCC would have
6268 taken a different path through the ABI logic. This should result in
6269 a -Wpsabi warning if the earlier path led to a different ABI decision.
6271 WARN_PSABI_EMPTY_CXX17_BASE
6272 Indicates that the type includes an artificial empty C++17 base field
6273 that, prior to GCC 10.1, would prevent the type from being treated as
6274 a HFA or HVA. See PR94711 for details.
6276 WARN_PSABI_NO_UNIQUE_ADDRESS
6277 Indicates that the type includes an empty [[no_unique_address]] field
6278 that, prior to GCC 10.1, would prevent the type from being treated as
6280 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE
= 1U << 0;
6281 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS
= 1U << 1;
6282 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD
= 1U << 2;
6284 /* Walk down the type tree of TYPE counting consecutive base elements.
6285 If *MODEP is VOIDmode, then set it to the first valid floating point
6286 type. If a non-floating point type is found, or if a floating point
6287 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6288 otherwise return the count in the sub-tree.
6290 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6291 function has changed its behavior relative to earlier versions of GCC.
6292 Normally the argument should be nonnull and point to a zero-initialized
6293 variable. The function then records whether the ABI decision might
6294 be affected by a known fix to the ABI logic, setting the associated
6295 WARN_PSABI_* bits if so.
6297 When the argument is instead a null pointer, the function tries to
6298 simulate the behavior of GCC before all such ABI fixes were made.
6299 This is useful to check whether the function returns something
6300 different after the ABI fixes. */
6302 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
,
6303 unsigned int *warn_psabi_flags
)
6308 switch (TREE_CODE (type
))
6311 mode
= TYPE_MODE (type
);
6312 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
&& mode
!= BFmode
)
6315 if (*modep
== VOIDmode
)
6324 mode
= TYPE_MODE (TREE_TYPE (type
));
6325 if (mode
!= DFmode
&& mode
!= SFmode
)
6328 if (*modep
== VOIDmode
)
6337 /* Use V2SImode and V4SImode as representatives of all 64-bit
6338 and 128-bit vector types, whether or not those modes are
6339 supported with the present options. */
6340 size
= int_size_in_bytes (type
);
6353 if (*modep
== VOIDmode
)
6356 /* Vector modes are considered to be opaque: two vectors are
6357 equivalent for the purposes of being homogeneous aggregates
6358 if they are the same size. */
6367 tree index
= TYPE_DOMAIN (type
);
6369 /* Can't handle incomplete types nor sizes that are not
6371 if (!COMPLETE_TYPE_P (type
)
6372 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6375 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
,
6379 || !TYPE_MAX_VALUE (index
)
6380 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
6381 || !TYPE_MIN_VALUE (index
)
6382 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
6386 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
6387 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
6389 /* There must be no padding. */
6390 if (wi::to_wide (TYPE_SIZE (type
))
6391 != count
* GET_MODE_BITSIZE (*modep
))
6403 /* Can't handle incomplete types nor sizes that are not
6405 if (!COMPLETE_TYPE_P (type
)
6406 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6409 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6411 if (TREE_CODE (field
) != FIELD_DECL
)
6414 if (DECL_FIELD_ABI_IGNORED (field
))
6416 /* See whether this is something that earlier versions of
6417 GCC failed to ignore. */
6419 if (lookup_attribute ("no_unique_address",
6420 DECL_ATTRIBUTES (field
)))
6421 flag
= WARN_PSABI_NO_UNIQUE_ADDRESS
;
6422 else if (cxx17_empty_base_field_p (field
))
6423 flag
= WARN_PSABI_EMPTY_CXX17_BASE
;
6425 /* No compatibility problem. */
6428 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6429 if (warn_psabi_flags
)
6431 *warn_psabi_flags
|= flag
;
6435 /* A zero-width bitfield may affect layout in some
6436 circumstances, but adds no members. The determination
6437 of whether or not a type is an HFA is performed after
6438 layout is complete, so if the type still looks like an
6439 HFA afterwards, it is still classed as one. This is
6440 potentially an ABI break for the hard-float ABI. */
6441 else if (DECL_BIT_FIELD (field
)
6442 && integer_zerop (DECL_SIZE (field
)))
6444 /* Prior to GCC-12 these fields were striped early,
6445 hiding them from the back-end entirely and
6446 resulting in the correct behaviour for argument
6447 passing. Simulate that old behaviour without
6448 generating a warning. */
6449 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field
))
6451 if (warn_psabi_flags
)
6453 *warn_psabi_flags
|= WARN_PSABI_ZERO_WIDTH_BITFIELD
;
6458 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
,
6465 /* There must be no padding. */
6466 if (wi::to_wide (TYPE_SIZE (type
))
6467 != count
* GET_MODE_BITSIZE (*modep
))
6474 case QUAL_UNION_TYPE
:
6476 /* These aren't very interesting except in a degenerate case. */
6481 /* Can't handle incomplete types nor sizes that are not
6483 if (!COMPLETE_TYPE_P (type
)
6484 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6487 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6489 if (TREE_CODE (field
) != FIELD_DECL
)
6492 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
,
6496 count
= count
> sub_count
? count
: sub_count
;
6499 /* There must be no padding. */
6500 if (wi::to_wide (TYPE_SIZE (type
))
6501 != count
* GET_MODE_BITSIZE (*modep
))
6514 /* Return true if PCS_VARIANT should use VFP registers. */
6516 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
6518 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
6520 static bool seen_thumb1_vfp
= false;
6522 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
6524 sorry ("Thumb-1 %<hard-float%> VFP ABI");
6525 /* sorry() is not immediately fatal, so only display this once. */
6526 seen_thumb1_vfp
= true;
6532 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
6535 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
6536 (TARGET_VFP_DOUBLE
|| !is_double
));
6539 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6540 suitable for passing or returning in VFP registers for the PCS
6541 variant selected. If it is, then *BASE_MODE is updated to contain
6542 a machine mode describing each element of the argument's type and
6543 *COUNT to hold the number of such elements. */
6545 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
6546 machine_mode mode
, const_tree type
,
6547 machine_mode
*base_mode
, int *count
)
6549 machine_mode new_mode
= VOIDmode
;
6551 /* If we have the type information, prefer that to working things
6552 out from the mode. */
6555 unsigned int warn_psabi_flags
= 0;
6556 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
,
6558 if (ag_count
> 0 && ag_count
<= 4)
6560 static unsigned last_reported_type_uid
;
6561 unsigned uid
= TYPE_UID (TYPE_MAIN_VARIANT (type
));
6565 && uid
!= last_reported_type_uid
6566 && ((alt
= aapcs_vfp_sub_candidate (type
, &new_mode
, NULL
))
6570 = CHANGES_ROOT_URL
"gcc-10/changes.html#empty_base";
6572 = CHANGES_ROOT_URL
"gcc-12/changes.html#zero_width_bitfields";
6573 gcc_assert (alt
== -1);
6574 last_reported_type_uid
= uid
;
6575 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6577 if (warn_psabi_flags
& WARN_PSABI_NO_UNIQUE_ADDRESS
)
6578 inform (input_location
, "parameter passing for argument of "
6579 "type %qT with %<[[no_unique_address]]%> members "
6580 "changed %{in GCC 10.1%}",
6581 TYPE_MAIN_VARIANT (type
), url10
);
6582 else if (warn_psabi_flags
& WARN_PSABI_EMPTY_CXX17_BASE
)
6583 inform (input_location
, "parameter passing for argument of "
6584 "type %qT when C++17 is enabled changed to match "
6585 "C++14 %{in GCC 10.1%}",
6586 TYPE_MAIN_VARIANT (type
), url10
);
6587 else if (warn_psabi_flags
& WARN_PSABI_ZERO_WIDTH_BITFIELD
)
6588 inform (input_location
, "parameter passing for argument of "
6589 "type %qT changed %{in GCC 12.1%}",
6590 TYPE_MAIN_VARIANT (type
), url12
);
6597 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
6598 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6599 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6604 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6607 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
6613 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
6616 *base_mode
= new_mode
;
6618 if (TARGET_GENERAL_REGS_ONLY
)
6619 error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6626 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
6627 machine_mode mode
, const_tree type
)
6629 int count ATTRIBUTE_UNUSED
;
6630 machine_mode ag_mode ATTRIBUTE_UNUSED
;
6632 if (!use_vfp_abi (pcs_variant
, false))
6634 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6639 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6642 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6645 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6646 &pcum
->aapcs_vfp_rmode
,
6647 &pcum
->aapcs_vfp_rcount
);
6650 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6651 for the behaviour of this function. */
6654 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6655 const_tree type ATTRIBUTE_UNUSED
)
6658 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6659 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6660 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6663 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6664 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6666 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6668 || (mode
== TImode
&& ! (TARGET_NEON
|| TARGET_HAVE_MVE
))
6669 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6672 int rcount
= pcum
->aapcs_vfp_rcount
;
6674 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6676 if (!(TARGET_NEON
|| TARGET_HAVE_MVE
))
6678 /* Avoid using unsupported vector modes. */
6679 if (rmode
== V2SImode
)
6681 else if (rmode
== V4SImode
)
6688 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6689 for (i
= 0; i
< rcount
; i
++)
6691 rtx tmp
= gen_rtx_REG (rmode
,
6692 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6693 tmp
= gen_rtx_EXPR_LIST
6695 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6696 XVECEXP (par
, 0, i
) = tmp
;
6699 pcum
->aapcs_reg
= par
;
6702 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6708 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6709 comment there for the behaviour of this function. */
6712 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6714 const_tree type ATTRIBUTE_UNUSED
)
6716 if (!use_vfp_abi (pcs_variant
, false))
6720 || (GET_MODE_CLASS (mode
) == MODE_INT
6721 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6722 && !(TARGET_NEON
|| TARGET_HAVE_MVE
)))
6725 machine_mode ag_mode
;
6730 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6733 if (!(TARGET_NEON
|| TARGET_HAVE_MVE
))
6735 if (ag_mode
== V2SImode
)
6737 else if (ag_mode
== V4SImode
)
6743 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6744 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6745 for (i
= 0; i
< count
; i
++)
6747 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6748 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6749 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6750 XVECEXP (par
, 0, i
) = tmp
;
6756 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6760 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6761 machine_mode mode ATTRIBUTE_UNUSED
,
6762 const_tree type ATTRIBUTE_UNUSED
)
6764 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6765 pcum
->aapcs_vfp_reg_alloc
= 0;
6769 #define AAPCS_CP(X) \
6771 aapcs_ ## X ## _cum_init, \
6772 aapcs_ ## X ## _is_call_candidate, \
6773 aapcs_ ## X ## _allocate, \
6774 aapcs_ ## X ## _is_return_candidate, \
6775 aapcs_ ## X ## _allocate_return_reg, \
6776 aapcs_ ## X ## _advance \
6779 /* Table of co-processors that can be used to pass arguments in
6780 registers. Idealy no arugment should be a candidate for more than
6781 one co-processor table entry, but the table is processed in order
6782 and stops after the first match. If that entry then fails to put
6783 the argument into a co-processor register, the argument will go on
6787 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6788 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6790 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6791 BLKmode) is a candidate for this co-processor's registers; this
6792 function should ignore any position-dependent state in
6793 CUMULATIVE_ARGS and only use call-type dependent information. */
6794 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6796 /* Return true if the argument does get a co-processor register; it
6797 should set aapcs_reg to an RTX of the register allocated as is
6798 required for a return from FUNCTION_ARG. */
6799 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6801 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6802 be returned in this co-processor's registers. */
6803 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6805 /* Allocate and return an RTX element to hold the return type of a call. This
6806 routine must not fail and will only be called if is_return_candidate
6807 returned true with the same parameters. */
6808 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6810 /* Finish processing this argument and prepare to start processing
6812 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6813 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6821 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6826 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6827 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6834 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6836 /* We aren't passed a decl, so we can't check that a call is local.
6837 However, it isn't clear that that would be a win anyway, since it
6838 might limit some tail-calling opportunities. */
6839 enum arm_pcs pcs_variant
;
6843 const_tree fndecl
= NULL_TREE
;
6845 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6848 fntype
= TREE_TYPE (fntype
);
6851 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6854 pcs_variant
= arm_pcs_default
;
6856 if (pcs_variant
!= ARM_PCS_AAPCS
)
6860 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6861 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6870 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6873 /* We aren't passed a decl, so we can't check that a call is local.
6874 However, it isn't clear that that would be a win anyway, since it
6875 might limit some tail-calling opportunities. */
6876 enum arm_pcs pcs_variant
;
6877 int unsignedp ATTRIBUTE_UNUSED
;
6881 const_tree fndecl
= NULL_TREE
;
6883 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6886 fntype
= TREE_TYPE (fntype
);
6889 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6892 pcs_variant
= arm_pcs_default
;
6894 /* Promote integer types. */
6895 if (type
&& INTEGRAL_TYPE_P (type
))
6896 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6898 if (pcs_variant
!= ARM_PCS_AAPCS
)
6902 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6903 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6905 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6909 /* Promotes small structs returned in a register to full-word size
6910 for big-endian AAPCS. */
6911 if (type
&& arm_return_in_msb (type
))
6913 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6914 if (size
% UNITS_PER_WORD
!= 0)
6916 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6917 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
6921 return gen_rtx_REG (mode
, R0_REGNUM
);
6925 aapcs_libcall_value (machine_mode mode
)
6927 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6928 && GET_MODE_SIZE (mode
) <= 4)
6931 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6934 /* Lay out a function argument using the AAPCS rules. The rule
6935 numbers referred to here are those in the AAPCS. */
6937 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6938 const_tree type
, bool named
)
6943 /* We only need to do this once per argument. */
6944 if (pcum
->aapcs_arg_processed
)
6947 pcum
->aapcs_arg_processed
= true;
6949 /* Special case: if named is false then we are handling an incoming
6950 anonymous argument which is on the stack. */
6954 /* Is this a potential co-processor register candidate? */
6955 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6957 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6958 pcum
->aapcs_cprc_slot
= slot
;
6960 /* We don't have to apply any of the rules from part B of the
6961 preparation phase, these are handled elsewhere in the
6966 /* A Co-processor register candidate goes either in its own
6967 class of registers or on the stack. */
6968 if (!pcum
->aapcs_cprc_failed
[slot
])
6970 /* C1.cp - Try to allocate the argument to co-processor
6972 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6975 /* C2.cp - Put the argument on the stack and note that we
6976 can't assign any more candidates in this slot. We also
6977 need to note that we have allocated stack space, so that
6978 we won't later try to split a non-cprc candidate between
6979 core registers and the stack. */
6980 pcum
->aapcs_cprc_failed
[slot
] = true;
6981 pcum
->can_split
= false;
6984 /* We didn't get a register, so this argument goes on the
6986 gcc_assert (pcum
->can_split
== false);
6991 /* C3 - For double-word aligned arguments, round the NCRN up to the
6992 next even number. */
6993 ncrn
= pcum
->aapcs_ncrn
;
6996 int res
= arm_needs_doubleword_align (mode
, type
);
6997 /* Only warn during RTL expansion of call stmts, otherwise we would
6998 warn e.g. during gimplification even on functions that will be
6999 always inlined, and we'd warn multiple times. Don't warn when
7000 called in expand_function_start either, as we warn instead in
7001 arm_function_arg_boundary in that case. */
7002 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
7003 inform (input_location
, "parameter passing for argument of type "
7004 "%qT changed in GCC 7.1", type
);
7009 nregs
= ARM_NUM_REGS2(mode
, type
);
7011 /* Sigh, this test should really assert that nregs > 0, but a GCC
7012 extension allows empty structs and then gives them empty size; it
7013 then allows such a structure to be passed by value. For some of
7014 the code below we have to pretend that such an argument has
7015 non-zero size so that we 'locate' it correctly either in
7016 registers or on the stack. */
7017 gcc_assert (nregs
>= 0);
7019 nregs2
= nregs
? nregs
: 1;
7021 /* C4 - Argument fits entirely in core registers. */
7022 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
7024 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
7025 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
7029 /* C5 - Some core registers left and there are no arguments already
7030 on the stack: split this argument between the remaining core
7031 registers and the stack. */
7032 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
7034 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
7035 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
7036 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
7040 /* C6 - NCRN is set to 4. */
7041 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
7043 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
7047 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7048 for a call to a function whose data type is FNTYPE.
7049 For a library call, FNTYPE is NULL. */
7051 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
7053 tree fndecl ATTRIBUTE_UNUSED
)
7055 /* Long call handling. */
7057 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
7059 pcum
->pcs_variant
= arm_pcs_default
;
7061 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7063 if (arm_libcall_uses_aapcs_base (libname
))
7064 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
7066 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
7067 pcum
->aapcs_reg
= NULL_RTX
;
7068 pcum
->aapcs_partial
= 0;
7069 pcum
->aapcs_arg_processed
= false;
7070 pcum
->aapcs_cprc_slot
= -1;
7071 pcum
->can_split
= true;
7073 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
7077 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
7079 pcum
->aapcs_cprc_failed
[i
] = false;
7080 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
7088 /* On the ARM, the offset starts at 0. */
7090 pcum
->iwmmxt_nregs
= 0;
7091 pcum
->can_split
= true;
7093 /* Varargs vectors are treated the same as long long.
7094 named_count avoids having to change the way arm handles 'named' */
7095 pcum
->named_count
= 0;
7098 if (TARGET_REALLY_IWMMXT
&& fntype
)
7102 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
7104 fn_arg
= TREE_CHAIN (fn_arg
))
7105 pcum
->named_count
+= 1;
7107 if (! pcum
->named_count
)
7108 pcum
->named_count
= INT_MAX
;
7112 /* Return 2 if double word alignment is required for argument passing,
7113 but wasn't required before the fix for PR88469.
7114 Return 1 if double word alignment is required for argument passing.
7115 Return -1 if double word alignment used to be required for argument
7116 passing before PR77728 ABI fix, but is not required anymore.
7117 Return 0 if double word alignment is not required and wasn't requried
7120 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
7123 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
7125 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
7126 if (!AGGREGATE_TYPE_P (type
))
7127 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
7129 /* Array types: Use member alignment of element type. */
7130 if (TREE_CODE (type
) == ARRAY_TYPE
)
7131 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
7135 /* Record/aggregate types: Use greatest member alignment of any member.
7137 Note that we explicitly consider zero-sized fields here, even though
7138 they don't map to AAPCS machine types. For example, in:
7140 struct __attribute__((aligned(8))) empty {};
7143 [[no_unique_address]] empty e;
7147 "s" contains only one Fundamental Data Type (the int field)
7148 but gains 8-byte alignment and size thanks to "e". */
7149 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7150 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
7152 if (TREE_CODE (field
) == FIELD_DECL
)
7155 /* Before PR77728 fix, we were incorrectly considering also
7156 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7157 Make sure we can warn about that with -Wpsabi. */
7160 else if (TREE_CODE (field
) == FIELD_DECL
7161 && DECL_BIT_FIELD_TYPE (field
)
7162 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field
)) > PARM_BOUNDARY
)
7172 /* Determine where to put an argument to a function.
7173 Value is zero to push the argument on the stack,
7174 or a hard register in which to store the argument.
7176 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7177 the preceding args and about the function being called.
7178 ARG is a description of the argument.
7180 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7181 other arguments are passed on the stack. If (NAMED == 0) (which happens
7182 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7183 defined), say it is passed in the stack (function_prologue will
7184 indeed make it pass in the stack if necessary). */
7187 arm_function_arg (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
7189 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7192 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7193 a call insn (op3 of a call_value insn). */
7194 if (arg
.end_marker_p ())
7197 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7199 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7200 return pcum
->aapcs_reg
;
7203 /* Varargs vectors are treated the same as long long.
7204 named_count avoids having to change the way arm handles 'named' */
7205 if (TARGET_IWMMXT_ABI
7206 && arm_vector_mode_supported_p (arg
.mode
)
7207 && pcum
->named_count
> pcum
->nargs
+ 1)
7209 if (pcum
->iwmmxt_nregs
<= 9)
7210 return gen_rtx_REG (arg
.mode
,
7211 pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
7214 pcum
->can_split
= false;
7219 /* Put doubleword aligned quantities in even register pairs. */
7220 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
7222 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
7223 if (res
< 0 && warn_psabi
)
7224 inform (input_location
, "parameter passing for argument of type "
7225 "%qT changed in GCC 7.1", arg
.type
);
7229 if (res
> 1 && warn_psabi
)
7230 inform (input_location
, "parameter passing for argument of type "
7231 "%qT changed in GCC 9.1", arg
.type
);
7235 /* Only allow splitting an arg between regs and memory if all preceding
7236 args were allocated to regs. For args passed by reference we only count
7237 the reference pointer. */
7238 if (pcum
->can_split
)
7241 nregs
= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
7243 if (!arg
.named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
7246 return gen_rtx_REG (arg
.mode
, pcum
->nregs
);
7250 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
7252 if (!ARM_DOUBLEWORD_ALIGN
)
7253 return PARM_BOUNDARY
;
7255 int res
= arm_needs_doubleword_align (mode
, type
);
7256 if (res
< 0 && warn_psabi
)
7257 inform (input_location
, "parameter passing for argument of type %qT "
7258 "changed in GCC 7.1", type
);
7259 if (res
> 1 && warn_psabi
)
7260 inform (input_location
, "parameter passing for argument of type "
7261 "%qT changed in GCC 9.1", type
);
7263 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
7267 arm_arg_partial_bytes (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
7269 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7270 int nregs
= pcum
->nregs
;
7272 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7274 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7275 return pcum
->aapcs_partial
;
7278 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (arg
.mode
))
7281 if (NUM_ARG_REGS
> nregs
7282 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (arg
.mode
, arg
.type
))
7284 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
7289 /* Update the data in PCUM to advance over argument ARG. */
7292 arm_function_arg_advance (cumulative_args_t pcum_v
,
7293 const function_arg_info
&arg
)
7295 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7297 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7299 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7301 if (pcum
->aapcs_cprc_slot
>= 0)
7303 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, arg
.mode
,
7305 pcum
->aapcs_cprc_slot
= -1;
7308 /* Generic stuff. */
7309 pcum
->aapcs_arg_processed
= false;
7310 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
7311 pcum
->aapcs_reg
= NULL_RTX
;
7312 pcum
->aapcs_partial
= 0;
7317 if (arm_vector_mode_supported_p (arg
.mode
)
7318 && pcum
->named_count
> pcum
->nargs
7319 && TARGET_IWMMXT_ABI
)
7320 pcum
->iwmmxt_nregs
+= 1;
7322 pcum
->nregs
+= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
7326 /* Variable sized types are passed by reference. This is a GCC
7327 extension to the ARM ABI. */
7330 arm_pass_by_reference (cumulative_args_t
, const function_arg_info
&arg
)
7332 return arg
.type
&& TREE_CODE (TYPE_SIZE (arg
.type
)) != INTEGER_CST
;
7335 /* Encode the current state of the #pragma [no_]long_calls. */
7338 OFF
, /* No #pragma [no_]long_calls is in effect. */
7339 LONG
, /* #pragma long_calls is in effect. */
7340 SHORT
/* #pragma no_long_calls is in effect. */
7343 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
7346 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7348 arm_pragma_long_calls
= LONG
;
7352 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7354 arm_pragma_long_calls
= SHORT
;
7358 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7360 arm_pragma_long_calls
= OFF
;
7363 /* Handle an attribute requiring a FUNCTION_DECL;
7364 arguments as in struct attribute_spec.handler. */
7366 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
7367 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7369 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7371 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7373 *no_add_attrs
= true;
7379 /* Handle an "interrupt" or "isr" attribute;
7380 arguments as in struct attribute_spec.handler. */
7382 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
7387 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7389 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7391 *no_add_attrs
= true;
7393 else if (TARGET_VFP_BASE
)
7395 warning (OPT_Wattributes
, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7398 /* FIXME: the argument if any is checked for type attributes;
7399 should it be checked for decl ones? */
7403 if (TREE_CODE (*node
) == FUNCTION_TYPE
7404 || TREE_CODE (*node
) == METHOD_TYPE
)
7406 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
7408 warning (OPT_Wattributes
, "%qE attribute ignored",
7410 *no_add_attrs
= true;
7413 else if (TREE_CODE (*node
) == POINTER_TYPE
7414 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
7415 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
7416 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
7418 *node
= build_variant_type_copy (*node
);
7419 TREE_TYPE (*node
) = build_type_attribute_variant
7421 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
7422 *no_add_attrs
= true;
7426 /* Possibly pass this attribute on from the type to a decl. */
7427 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
7428 | (int) ATTR_FLAG_FUNCTION_NEXT
7429 | (int) ATTR_FLAG_ARRAY_NEXT
))
7431 *no_add_attrs
= true;
7432 return tree_cons (name
, args
, NULL_TREE
);
7436 warning (OPT_Wattributes
, "%qE attribute ignored",
7445 /* Handle a "pcs" attribute; arguments as in struct
7446 attribute_spec.handler. */
7448 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
7449 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7451 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
7453 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
7454 *no_add_attrs
= true;
7459 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7460 /* Handle the "notshared" attribute. This attribute is another way of
7461 requesting hidden visibility. ARM's compiler supports
7462 "__declspec(notshared)"; we support the same thing via an
7466 arm_handle_notshared_attribute (tree
*node
,
7467 tree name ATTRIBUTE_UNUSED
,
7468 tree args ATTRIBUTE_UNUSED
,
7469 int flags ATTRIBUTE_UNUSED
,
7472 tree decl
= TYPE_NAME (*node
);
7476 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
7477 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
7478 *no_add_attrs
= false;
7484 /* This function returns true if a function with declaration FNDECL and type
7485 FNTYPE uses the stack to pass arguments or return variables and false
7486 otherwise. This is used for functions with the attributes
7487 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7488 diagnostic messages if the stack is used. NAME is the name of the attribute
7492 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
7494 function_args_iterator args_iter
;
7495 CUMULATIVE_ARGS args_so_far_v
;
7496 cumulative_args_t args_so_far
;
7497 bool first_param
= true;
7498 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
7500 /* Error out if any argument is passed on the stack. */
7501 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
7502 args_so_far
= pack_cumulative_args (&args_so_far_v
);
7503 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
7507 prev_arg_type
= arg_type
;
7508 if (VOID_TYPE_P (arg_type
))
7511 function_arg_info
arg (arg_type
, /*named=*/true);
7513 /* ??? We should advance after processing the argument and pass
7514 the argument we're advancing past. */
7515 arm_function_arg_advance (args_so_far
, arg
);
7516 arg_rtx
= arm_function_arg (args_so_far
, arg
);
7517 if (!arg_rtx
|| arm_arg_partial_bytes (args_so_far
, arg
))
7519 error ("%qE attribute not available to functions with arguments "
7520 "passed on the stack", name
);
7523 first_param
= false;
7526 /* Error out for variadic functions since we cannot control how many
7527 arguments will be passed and thus stack could be used. stdarg_p () is not
7528 used for the checking to avoid browsing arguments twice. */
7529 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
7531 error ("%qE attribute not available to functions with variable number "
7532 "of arguments", name
);
7536 /* Error out if return value is passed on the stack. */
7537 ret_type
= TREE_TYPE (fntype
);
7538 if (arm_return_in_memory (ret_type
, fntype
))
7540 error ("%qE attribute not available to functions that return value on "
7547 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7548 function will check whether the attribute is allowed here and will add the
7549 attribute to the function declaration tree or otherwise issue a warning. */
7552 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
7561 *no_add_attrs
= true;
7562 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7567 /* Ignore attribute for function types. */
7568 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7570 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7572 *no_add_attrs
= true;
7578 /* Warn for static linkage functions. */
7579 if (!TREE_PUBLIC (fndecl
))
7581 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
7582 "with static linkage", name
);
7583 *no_add_attrs
= true;
7587 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
7588 TREE_TYPE (fndecl
));
7593 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7594 function will check whether the attribute is allowed here and will add the
7595 attribute to the function type tree or otherwise issue a diagnostic. The
7596 reason we check this at declaration time is to only allow the use of the
7597 attribute with declarations of function pointers and not function
7598 declarations. This function checks NODE is of the expected type and issues
7599 diagnostics otherwise using NAME. If it is not of the expected type
7600 *NO_ADD_ATTRS will be set to true. */
7603 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
7608 tree decl
= NULL_TREE
;
7613 *no_add_attrs
= true;
7614 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7621 fntype
= TREE_TYPE (*node
);
7623 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
7629 while (fntype
&& TREE_CODE (fntype
) == POINTER_TYPE
)
7630 fntype
= TREE_TYPE (fntype
);
7632 if ((DECL_P (*node
) && !decl
) || TREE_CODE (fntype
) != FUNCTION_TYPE
)
7634 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
7635 "function pointer", name
);
7636 *no_add_attrs
= true;
7640 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
7645 /* Prevent trees being shared among function types with and without
7646 cmse_nonsecure_call attribute. */
7649 type
= build_distinct_type_copy (TREE_TYPE (decl
));
7650 TREE_TYPE (decl
) = type
;
7654 type
= build_distinct_type_copy (*node
);
7660 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
7663 fntype
= TREE_TYPE (fntype
);
7664 fntype
= build_distinct_type_copy (fntype
);
7665 TREE_TYPE (type
) = fntype
;
7668 /* Construct a type attribute and add it to the function type. */
7669 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
7670 TYPE_ATTRIBUTES (fntype
));
7671 TYPE_ATTRIBUTES (fntype
) = attrs
;
7675 /* Return 0 if the attributes for two types are incompatible, 1 if they
7676 are compatible, and 2 if they are nearly compatible (which causes a
7677 warning to be generated). */
7679 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7683 tree attrs1
= lookup_attribute ("Advanced SIMD type",
7684 TYPE_ATTRIBUTES (type1
));
7685 tree attrs2
= lookup_attribute ("Advanced SIMD type",
7686 TYPE_ATTRIBUTES (type2
));
7687 if (bool (attrs1
) != bool (attrs2
))
7689 if (attrs1
&& !attribute_value_equal (attrs1
, attrs2
))
7692 /* Check for mismatch of non-default calling convention. */
7693 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7696 /* Check for mismatched call attributes. */
7697 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7698 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7699 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7700 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7702 /* Only bother to check if an attribute is defined. */
7703 if (l1
| l2
| s1
| s2
)
7705 /* If one type has an attribute, the other must have the same attribute. */
7706 if ((l1
!= l2
) || (s1
!= s2
))
7709 /* Disallow mixed attributes. */
7710 if ((l1
& s2
) || (l2
& s1
))
7714 /* Check for mismatched ISR attribute. */
7715 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7717 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7718 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7720 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7724 l1
= lookup_attribute ("cmse_nonsecure_call",
7725 TYPE_ATTRIBUTES (type1
)) != NULL
;
7726 l2
= lookup_attribute ("cmse_nonsecure_call",
7727 TYPE_ATTRIBUTES (type2
)) != NULL
;
7735 /* Assigns default attributes to newly defined type. This is used to
7736 set short_call/long_call attributes for function types of
7737 functions defined inside corresponding #pragma scopes. */
7739 arm_set_default_type_attributes (tree type
)
7741 /* Add __attribute__ ((long_call)) to all functions, when
7742 inside #pragma long_calls or __attribute__ ((short_call)),
7743 when inside #pragma no_long_calls. */
7744 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7746 tree type_attr_list
, attr_name
;
7747 type_attr_list
= TYPE_ATTRIBUTES (type
);
7749 if (arm_pragma_long_calls
== LONG
)
7750 attr_name
= get_identifier ("long_call");
7751 else if (arm_pragma_long_calls
== SHORT
)
7752 attr_name
= get_identifier ("short_call");
7756 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7757 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7761 /* Return true if DECL is known to be linked into section SECTION. */
7764 arm_function_in_section_p (tree decl
, section
*section
)
7766 /* We can only be certain about the prevailing symbol definition. */
7767 if (!decl_binds_to_current_def_p (decl
))
7770 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7771 if (!DECL_SECTION_NAME (decl
))
7773 /* Make sure that we will not create a unique section for DECL. */
7774 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7778 return function_section (decl
) == section
;
7781 /* Return nonzero if a 32-bit "long_call" should be generated for
7782 a call from the current function to DECL. We generate a long_call
7785 a. has an __attribute__((long call))
7786 or b. is within the scope of a #pragma long_calls
7787 or c. the -mlong-calls command line switch has been specified
7789 However we do not generate a long call if the function:
7791 d. has an __attribute__ ((short_call))
7792 or e. is inside the scope of a #pragma no_long_calls
7793 or f. is defined in the same section as the current function. */
7796 arm_is_long_call_p (tree decl
)
7801 return TARGET_LONG_CALLS
;
7803 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7804 if (lookup_attribute ("short_call", attrs
))
7807 /* For "f", be conservative, and only cater for cases in which the
7808 whole of the current function is placed in the same section. */
7809 if (!flag_reorder_blocks_and_partition
7810 && TREE_CODE (decl
) == FUNCTION_DECL
7811 && arm_function_in_section_p (decl
, current_function_section ()))
7814 if (lookup_attribute ("long_call", attrs
))
7817 return TARGET_LONG_CALLS
;
7820 /* Return nonzero if it is ok to make a tail-call to DECL. */
7822 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7824 unsigned long func_type
;
7826 if (cfun
->machine
->sibcall_blocked
)
7831 /* In FDPIC, never tailcall something for which we have no decl:
7832 the target function could be in a different module, requiring
7833 a different FDPIC register value. */
7838 /* Never tailcall something if we are generating code for Thumb-1. */
7842 /* The PIC register is live on entry to VxWorks PLT entries, so we
7843 must make the call before restoring the PIC register. */
7844 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7847 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7848 may be used both as target of the call and base register for restoring
7849 the VFP registers */
7850 if (TARGET_APCS_FRAME
&& TARGET_ARM
7851 && TARGET_HARD_FLOAT
7852 && decl
&& arm_is_long_call_p (decl
))
7855 /* If we are interworking and the function is not declared static
7856 then we can't tail-call it unless we know that it exists in this
7857 compilation unit (since it might be a Thumb routine). */
7858 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7859 && !TREE_ASM_WRITTEN (decl
))
7862 func_type
= arm_current_func_type ();
7863 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7864 if (IS_INTERRUPT (func_type
))
7867 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7868 generated for entry functions themselves. */
7869 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7872 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7873 this would complicate matters for later code generation. */
7874 if (TREE_CODE (exp
) == CALL_EXPR
)
7876 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7877 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7881 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7883 /* Check that the return value locations are the same. For
7884 example that we aren't returning a value from the sibling in
7885 a VFP register but then need to transfer it to a core
7888 tree decl_or_type
= decl
;
7890 /* If it is an indirect function pointer, get the function type. */
7892 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7894 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7895 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7897 if (!rtx_equal_p (a
, b
))
7901 /* Never tailcall if function may be called with a misaligned SP. */
7902 if (IS_STACKALIGN (func_type
))
7905 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7906 references should become a NOP. Don't convert such calls into
7908 if (TARGET_AAPCS_BASED
7909 && arm_abi
== ARM_ABI_AAPCS
7911 && DECL_WEAK (decl
))
7914 /* We cannot do a tailcall for an indirect call by descriptor if all the
7915 argument registers are used because the only register left to load the
7916 address is IP and it will already contain the static chain. */
7917 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7919 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7920 CUMULATIVE_ARGS cum
;
7921 cumulative_args_t cum_v
;
7923 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7924 cum_v
= pack_cumulative_args (&cum
);
7926 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7928 tree type
= TREE_VALUE (t
);
7929 if (!VOID_TYPE_P (type
))
7931 function_arg_info
arg (type
, /*named=*/true);
7932 arm_function_arg_advance (cum_v
, arg
);
7936 function_arg_info
arg (integer_type_node
, /*named=*/true);
7937 if (!arm_function_arg (cum_v
, arg
))
7941 /* Everything else is ok. */
7946 /* Addressing mode support functions. */
7948 /* Return nonzero if X is a legitimate immediate operand when compiling
7949 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7951 legitimate_pic_operand_p (rtx x
)
7953 if (SYMBOL_REF_P (x
)
7954 || (GET_CODE (x
) == CONST
7955 && GET_CODE (XEXP (x
, 0)) == PLUS
7956 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7962 /* Record that the current function needs a PIC register. If PIC_REG is null,
7963 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
7964 both case cfun->machine->pic_reg is initialized if we have not already done
7965 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
7966 PIC register is reloaded in the current position of the instruction stream
7967 irregardless of whether it was loaded before. Otherwise, it is only loaded
7968 if not already done so (crtl->uses_pic_offset_table is null). Note that
7969 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7970 is only supported iff COMPUTE_NOW is false. */
7973 require_pic_register (rtx pic_reg
, bool compute_now
)
7975 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
7977 /* A lot of the logic here is made obscure by the fact that this
7978 routine gets called as part of the rtx cost estimation process.
7979 We don't want those calls to affect any assumptions about the real
7980 function; and further, we can't call entry_of_function() until we
7981 start the real expansion process. */
7982 if (!crtl
->uses_pic_offset_table
|| compute_now
)
7984 gcc_assert (can_create_pseudo_p ()
7985 || (pic_reg
!= NULL_RTX
7987 && GET_MODE (pic_reg
) == Pmode
));
7988 if (arm_pic_register
!= INVALID_REGNUM
7990 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
7992 if (!cfun
->machine
->pic_reg
)
7993 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
7995 /* Play games to avoid marking the function as needing pic
7996 if we are being called as part of the cost-estimation
7998 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7999 crtl
->uses_pic_offset_table
= 1;
8003 rtx_insn
*seq
, *insn
;
8005 if (pic_reg
== NULL_RTX
)
8006 pic_reg
= gen_reg_rtx (Pmode
);
8007 if (!cfun
->machine
->pic_reg
)
8008 cfun
->machine
->pic_reg
= pic_reg
;
8010 /* Play games to avoid marking the function as needing pic
8011 if we are being called as part of the cost-estimation
8013 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
8015 crtl
->uses_pic_offset_table
= 1;
8018 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
8019 && arm_pic_register
> LAST_LO_REGNUM
8021 emit_move_insn (cfun
->machine
->pic_reg
,
8022 gen_rtx_REG (Pmode
, arm_pic_register
));
8024 arm_load_pic_register (0UL, pic_reg
);
8029 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
8031 INSN_LOCATION (insn
) = prologue_location
;
8033 /* We can be called during expansion of PHI nodes, where
8034 we can't yet emit instructions directly in the final
8035 insn stream. Queue the insns on the entry edge, they will
8036 be committed after everything else is expanded. */
8037 if (currently_expanding_to_rtl
)
8038 insert_insn_on_edge (seq
,
8040 (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
8048 /* Generate insns to calculate the address of ORIG in pic mode. */
8050 calculate_pic_address_constant (rtx reg
, rtx pic_reg
, rtx orig
)
8055 pat
= gen_calculate_pic_address (reg
, pic_reg
, orig
);
8057 /* Make the MEM as close to a constant as possible. */
8058 mem
= SET_SRC (pat
);
8059 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
8060 MEM_READONLY_P (mem
) = 1;
8061 MEM_NOTRAP_P (mem
) = 1;
8063 return emit_insn (pat
);
8066 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
8067 created to hold the result of the load. If not NULL, PIC_REG indicates
8068 which register to use as PIC register, otherwise it is decided by register
8069 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
8070 location in the instruction stream, irregardless of whether it was loaded
8071 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8072 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8074 Returns the register REG into which the PIC load is performed. */
8077 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
, rtx pic_reg
,
8080 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
8082 if (SYMBOL_REF_P (orig
)
8083 || LABEL_REF_P (orig
))
8087 gcc_assert (can_create_pseudo_p ());
8088 reg
= gen_reg_rtx (Pmode
);
8091 /* VxWorks does not impose a fixed gap between segments; the run-time
8092 gap can be different from the object-file gap. We therefore can't
8093 use GOTOFF unless we are absolutely sure that the symbol is in the
8094 same segment as the GOT. Unfortunately, the flexibility of linker
8095 scripts means that we can't be sure of that in general, so assume
8096 that GOTOFF is never valid on VxWorks. */
8097 /* References to weak symbols cannot be resolved locally: they
8098 may be overridden by a non-weak definition at link time. */
8100 if ((LABEL_REF_P (orig
)
8101 || (SYMBOL_REF_P (orig
)
8102 && SYMBOL_REF_LOCAL_P (orig
)
8103 && (SYMBOL_REF_DECL (orig
)
8104 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)
8105 && (!SYMBOL_REF_FUNCTION_P (orig
)
8106 || arm_fdpic_local_funcdesc_p (orig
))))
8108 && arm_pic_data_is_text_relative
)
8109 insn
= arm_pic_static_addr (orig
, reg
);
8112 /* If this function doesn't have a pic register, create one now. */
8113 require_pic_register (pic_reg
, compute_now
);
8115 if (pic_reg
== NULL_RTX
)
8116 pic_reg
= cfun
->machine
->pic_reg
;
8118 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
8121 /* Put a REG_EQUAL note on this insn, so that it can be optimized
8123 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
8127 else if (GET_CODE (orig
) == CONST
)
8131 if (GET_CODE (XEXP (orig
, 0)) == PLUS
8132 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
8135 /* Handle the case where we have: const (UNSPEC_TLS). */
8136 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
8137 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
8140 /* Handle the case where we have:
8141 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
8143 if (GET_CODE (XEXP (orig
, 0)) == PLUS
8144 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
8145 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
8147 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
8153 gcc_assert (can_create_pseudo_p ());
8154 reg
= gen_reg_rtx (Pmode
);
8157 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
8159 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
,
8160 pic_reg
, compute_now
);
8161 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
8162 base
== reg
? 0 : reg
, pic_reg
,
8165 if (CONST_INT_P (offset
))
8167 /* The base register doesn't really matter, we only want to
8168 test the index for the appropriate mode. */
8169 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
8171 gcc_assert (can_create_pseudo_p ());
8172 offset
= force_reg (Pmode
, offset
);
8175 if (CONST_INT_P (offset
))
8176 return plus_constant (Pmode
, base
, INTVAL (offset
));
8179 if (GET_MODE_SIZE (mode
) > 4
8180 && (GET_MODE_CLASS (mode
) == MODE_INT
8181 || TARGET_SOFT_FLOAT
))
8183 emit_insn (gen_addsi3 (reg
, base
, offset
));
8187 return gen_rtx_PLUS (Pmode
, base
, offset
);
8194 /* Generate insns that produce the address of the stack canary */
8196 arm_stack_protect_tls_canary_mem (bool reload
)
8198 rtx tp
= gen_reg_rtx (SImode
);
8200 emit_insn (gen_reload_tp_hard (tp
));
8202 emit_insn (gen_load_tp_hard (tp
));
8204 rtx reg
= gen_reg_rtx (SImode
);
8205 rtx offset
= GEN_INT (arm_stack_protector_guard_offset
);
8206 emit_set_insn (reg
, gen_rtx_PLUS (SImode
, tp
, offset
));
8207 return gen_rtx_MEM (SImode
, reg
);
8211 /* Whether a register is callee saved or not. This is necessary because high
8212 registers are marked as caller saved when optimizing for size on Thumb-1
8213 targets despite being callee saved in order to avoid using them. */
8214 #define callee_saved_reg_p(reg) \
8215 (!call_used_or_fixed_reg_p (reg) \
8216 || (TARGET_THUMB1 && optimize_size \
8217 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8219 /* Return a mask for the call-clobbered low registers that are unused
8220 at the end of the prologue. */
8221 static unsigned long
8222 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8224 unsigned long mask
= 0;
8225 bitmap prologue_live_out
= df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
8227 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
8228 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (prologue_live_out
, reg
))
8229 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
8233 /* Similarly for the start of the epilogue. */
8234 static unsigned long
8235 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8237 unsigned long mask
= 0;
8238 bitmap epilogue_live_in
= df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun
));
8240 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
8241 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (epilogue_live_in
, reg
))
8242 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
8246 /* Find a spare register to use during the prolog of a function. */
8249 thumb_find_work_register (unsigned long pushed_regs_mask
)
8253 unsigned long unused_regs
8254 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8256 /* Check the argument registers first as these are call-used. The
8257 register allocation order means that sometimes r3 might be used
8258 but earlier argument registers might not, so check them all. */
8259 for (reg
= LAST_LO_REGNUM
; reg
>= FIRST_LO_REGNUM
; reg
--)
8260 if (unused_regs
& (1 << (reg
- FIRST_LO_REGNUM
)))
8263 /* Otherwise look for a call-saved register that is going to be pushed. */
8264 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
8265 if (pushed_regs_mask
& (1 << reg
))
8270 /* Thumb-2 can use high regs. */
8271 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
8272 if (pushed_regs_mask
& (1 << reg
))
8275 /* Something went wrong - thumb_compute_save_reg_mask()
8276 should have arranged for a suitable register to be pushed. */
8280 static GTY(()) int pic_labelno
;
8282 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8286 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
, rtx pic_reg
)
8288 rtx l1
, labelno
, pic_tmp
, pic_rtx
;
8290 if (crtl
->uses_pic_offset_table
== 0
8291 || TARGET_SINGLE_PIC_BASE
8295 gcc_assert (flag_pic
);
8297 if (pic_reg
== NULL_RTX
)
8298 pic_reg
= cfun
->machine
->pic_reg
;
8299 if (TARGET_VXWORKS_RTP
)
8301 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
8302 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
8303 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
8305 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
8307 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8308 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
8312 /* We use an UNSPEC rather than a LABEL_REF because this label
8313 never appears in the code stream. */
8315 labelno
= GEN_INT (pic_labelno
++);
8316 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8317 l1
= gen_rtx_CONST (VOIDmode
, l1
);
8319 /* On the ARM the PC register contains 'dot + 8' at the time of the
8320 addition, on the Thumb it is 'dot + 4'. */
8321 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
8322 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
8324 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
8328 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8330 else /* TARGET_THUMB1 */
8332 if (arm_pic_register
!= INVALID_REGNUM
8333 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
8335 /* We will have pushed the pic register, so we should always be
8336 able to find a work register. */
8337 pic_tmp
= gen_rtx_REG (SImode
,
8338 thumb_find_work_register (saved_regs
));
8339 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
8340 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
8341 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
8343 else if (arm_pic_register
!= INVALID_REGNUM
8344 && arm_pic_register
> LAST_LO_REGNUM
8345 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
8347 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8348 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
8349 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
8352 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8356 /* Need to emit this whether or not we obey regdecls,
8357 since setjmp/longjmp can cause life info to screw up. */
8361 /* Try to determine whether an object, referenced via ORIG, will be
8362 placed in the text or data segment. This is used in FDPIC mode, to
8363 decide which relocations to use when accessing ORIG. *IS_READONLY
8364 is set to true if ORIG is a read-only location, false otherwise.
8365 Return true if we could determine the location of ORIG, false
8366 otherwise. *IS_READONLY is valid only when we return true. */
8368 arm_is_segment_info_known (rtx orig
, bool *is_readonly
)
8370 *is_readonly
= false;
8372 if (LABEL_REF_P (orig
))
8374 *is_readonly
= true;
8378 if (SYMBOL_REF_P (orig
))
8380 if (CONSTANT_POOL_ADDRESS_P (orig
))
8382 *is_readonly
= true;
8385 if (SYMBOL_REF_LOCAL_P (orig
)
8386 && !SYMBOL_REF_EXTERNAL_P (orig
)
8387 && SYMBOL_REF_DECL (orig
)
8388 && (!DECL_P (SYMBOL_REF_DECL (orig
))
8389 || !DECL_COMMON (SYMBOL_REF_DECL (orig
))))
8391 tree decl
= SYMBOL_REF_DECL (orig
);
8392 tree init
= (TREE_CODE (decl
) == VAR_DECL
)
8393 ? DECL_INITIAL (decl
) : (TREE_CODE (decl
) == CONSTRUCTOR
)
8396 bool named_section
, readonly
;
8398 if (init
&& init
!= error_mark_node
)
8399 reloc
= compute_reloc_for_constant (init
);
8401 named_section
= TREE_CODE (decl
) == VAR_DECL
8402 && lookup_attribute ("section", DECL_ATTRIBUTES (decl
));
8403 readonly
= decl_readonly_section (decl
, reloc
);
8405 /* We don't know where the link script will put a named
8406 section, so return false in such a case. */
8410 *is_readonly
= readonly
;
8414 /* We don't know. */
8421 /* Generate code to load the address of a static var when flag_pic is set. */
8423 arm_pic_static_addr (rtx orig
, rtx reg
)
8425 rtx l1
, labelno
, offset_rtx
;
8428 gcc_assert (flag_pic
);
8430 bool is_readonly
= false;
8431 bool info_known
= false;
8434 && SYMBOL_REF_P (orig
)
8435 && !SYMBOL_REF_FUNCTION_P (orig
))
8436 info_known
= arm_is_segment_info_known (orig
, &is_readonly
);
8439 && SYMBOL_REF_P (orig
)
8440 && !SYMBOL_REF_FUNCTION_P (orig
)
8443 /* We don't know where orig is stored, so we have be
8444 pessimistic and use a GOT relocation. */
8445 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
8447 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
8449 else if (TARGET_FDPIC
8450 && SYMBOL_REF_P (orig
)
8451 && (SYMBOL_REF_FUNCTION_P (orig
)
8454 /* We use the GOTOFF relocation. */
8455 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
8457 rtx l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, orig
), UNSPEC_PIC_SYM
);
8458 emit_insn (gen_movsi (reg
, l1
));
8459 insn
= emit_insn (gen_addsi3 (reg
, reg
, pic_reg
));
8463 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8464 PC-relative access. */
8465 /* We use an UNSPEC rather than a LABEL_REF because this label
8466 never appears in the code stream. */
8467 labelno
= GEN_INT (pic_labelno
++);
8468 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8469 l1
= gen_rtx_CONST (VOIDmode
, l1
);
8471 /* On the ARM the PC register contains 'dot + 8' at the time of the
8472 addition, on the Thumb it is 'dot + 4'. */
8473 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
8474 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
8475 UNSPEC_SYMBOL_OFFSET
);
8476 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
8478 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
,
8485 /* Return nonzero if X is valid as an ARM state addressing register. */
8487 arm_address_register_rtx_p (rtx x
, int strict_p
)
8497 return ARM_REGNO_OK_FOR_BASE_P (regno
);
8499 return (regno
<= LAST_ARM_REGNUM
8500 || regno
>= FIRST_PSEUDO_REGISTER
8501 || regno
== FRAME_POINTER_REGNUM
8502 || regno
== ARG_POINTER_REGNUM
);
8505 /* Return TRUE if this rtx is the difference of a symbol and a label,
8506 and will reduce to a PC-relative relocation in the object file.
8507 Expressions like this can be left alone when generating PIC, rather
8508 than forced through the GOT. */
8510 pcrel_constant_p (rtx x
)
8512 if (GET_CODE (x
) == MINUS
)
8513 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
8518 /* Return true if X will surely end up in an index register after next
8521 will_be_in_index_register (const_rtx x
)
8523 /* arm.md: calculate_pic_address will split this into a register. */
8524 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
8527 /* Return nonzero if X is a valid ARM state address operand. */
8529 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
8533 enum rtx_code code
= GET_CODE (x
);
8535 if (arm_address_register_rtx_p (x
, strict_p
))
8538 use_ldrd
= (TARGET_LDRD
8539 && (mode
== DImode
|| mode
== DFmode
));
8541 if (code
== POST_INC
|| code
== PRE_DEC
8542 || ((code
== PRE_INC
|| code
== POST_DEC
)
8543 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8544 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8546 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8547 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8548 && GET_CODE (XEXP (x
, 1)) == PLUS
8549 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8551 rtx addend
= XEXP (XEXP (x
, 1), 1);
8553 /* Don't allow ldrd post increment by register because it's hard
8554 to fixup invalid register choices. */
8556 && GET_CODE (x
) == POST_MODIFY
8560 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
8561 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
8564 /* After reload constants split into minipools will have addresses
8565 from a LABEL_REF. */
8566 else if (reload_completed
8567 && (code
== LABEL_REF
8569 && GET_CODE (XEXP (x
, 0)) == PLUS
8570 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8571 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8574 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
8577 else if (code
== PLUS
)
8579 rtx xop0
= XEXP (x
, 0);
8580 rtx xop1
= XEXP (x
, 1);
8582 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8583 && ((CONST_INT_P (xop1
)
8584 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
8585 || (!strict_p
&& will_be_in_index_register (xop1
))))
8586 || (arm_address_register_rtx_p (xop1
, strict_p
)
8587 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
8591 /* Reload currently can't handle MINUS, so disable this for now */
8592 else if (GET_CODE (x
) == MINUS
)
8594 rtx xop0
= XEXP (x
, 0);
8595 rtx xop1
= XEXP (x
, 1);
8597 return (arm_address_register_rtx_p (xop0
, strict_p
)
8598 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
8602 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8603 && code
== SYMBOL_REF
8604 && CONSTANT_POOL_ADDRESS_P (x
)
8606 && symbol_mentioned_p (get_pool_constant (x
))
8607 && ! pcrel_constant_p (get_pool_constant (x
))))
8613 /* Return true if we can avoid creating a constant pool entry for x. */
8615 can_avoid_literal_pool_for_label_p (rtx x
)
8617 /* Normally we can assign constant values to target registers without
8618 the help of constant pool. But there are cases we have to use constant
8620 1) assign a label to register.
8621 2) sign-extend a 8bit value to 32bit and then assign to register.
8623 Constant pool access in format:
8624 (set (reg r0) (mem (symbol_ref (".LC0"))))
8625 will cause the use of literal pool (later in function arm_reorg).
8626 So here we mark such format as an invalid format, then the compiler
8627 will adjust it into:
8628 (set (reg r0) (symbol_ref (".LC0")))
8629 (set (reg r0) (mem (reg r0))).
8630 No extra register is required, and (mem (reg r0)) won't cause the use
8631 of literal pools. */
8632 if (arm_disable_literal_pool
&& SYMBOL_REF_P (x
)
8633 && CONSTANT_POOL_ADDRESS_P (x
))
8639 /* Return nonzero if X is a valid Thumb-2 address operand. */
8641 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8644 enum rtx_code code
= GET_CODE (x
);
8646 if (TARGET_HAVE_MVE
&& VALID_MVE_MODE (mode
))
8647 return mve_vector_mem_operand (mode
, x
, strict_p
);
8649 if (arm_address_register_rtx_p (x
, strict_p
))
8652 use_ldrd
= (TARGET_LDRD
8653 && (mode
== DImode
|| mode
== DFmode
));
8655 if (code
== POST_INC
|| code
== PRE_DEC
8656 || ((code
== PRE_INC
|| code
== POST_DEC
)
8657 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8658 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8660 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8661 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8662 && GET_CODE (XEXP (x
, 1)) == PLUS
8663 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8665 /* Thumb-2 only has autoincrement by constant. */
8666 rtx addend
= XEXP (XEXP (x
, 1), 1);
8667 HOST_WIDE_INT offset
;
8669 if (!CONST_INT_P (addend
))
8672 offset
= INTVAL(addend
);
8673 if (GET_MODE_SIZE (mode
) <= 4)
8674 return (offset
> -256 && offset
< 256);
8676 return (use_ldrd
&& offset
> -1024 && offset
< 1024
8677 && (offset
& 3) == 0);
8680 /* After reload constants split into minipools will have addresses
8681 from a LABEL_REF. */
8682 else if (reload_completed
8683 && (code
== LABEL_REF
8685 && GET_CODE (XEXP (x
, 0)) == PLUS
8686 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8687 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8690 else if (mode
== TImode
8691 || (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
8692 || (TARGET_HAVE_MVE
&& VALID_MVE_STRUCT_MODE (mode
)))
8695 else if (code
== PLUS
)
8697 rtx xop0
= XEXP (x
, 0);
8698 rtx xop1
= XEXP (x
, 1);
8700 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8701 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
8702 || (!strict_p
&& will_be_in_index_register (xop1
))))
8703 || (arm_address_register_rtx_p (xop1
, strict_p
)
8704 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
8707 else if (can_avoid_literal_pool_for_label_p (x
))
8710 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8711 && code
== SYMBOL_REF
8712 && CONSTANT_POOL_ADDRESS_P (x
)
8714 && symbol_mentioned_p (get_pool_constant (x
))
8715 && ! pcrel_constant_p (get_pool_constant (x
))))
8721 /* Return nonzero if INDEX is valid for an address index operand in
8724 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
8727 HOST_WIDE_INT range
;
8728 enum rtx_code code
= GET_CODE (index
);
8730 /* Standard coprocessor addressing modes. */
8731 if (TARGET_HARD_FLOAT
8732 && (mode
== SFmode
|| mode
== DFmode
))
8733 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8734 && INTVAL (index
) > -1024
8735 && (INTVAL (index
) & 3) == 0);
8737 /* For quad modes, we restrict the constant offset to be slightly less
8738 than what the instruction format permits. We do this because for
8739 quad mode moves, we will actually decompose them into two separate
8740 double-mode reads or writes. INDEX must therefore be a valid
8741 (double-mode) offset and so should INDEX+8. */
8742 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8743 return (code
== CONST_INT
8744 && INTVAL (index
) < 1016
8745 && INTVAL (index
) > -1024
8746 && (INTVAL (index
) & 3) == 0);
8748 /* We have no such constraint on double mode offsets, so we permit the
8749 full range of the instruction format. */
8750 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8751 return (code
== CONST_INT
8752 && INTVAL (index
) < 1024
8753 && INTVAL (index
) > -1024
8754 && (INTVAL (index
) & 3) == 0);
8756 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8757 return (code
== CONST_INT
8758 && INTVAL (index
) < 1024
8759 && INTVAL (index
) > -1024
8760 && (INTVAL (index
) & 3) == 0);
8762 if (arm_address_register_rtx_p (index
, strict_p
)
8763 && (GET_MODE_SIZE (mode
) <= 4))
8766 if (mode
== DImode
|| mode
== DFmode
)
8768 if (code
== CONST_INT
)
8770 HOST_WIDE_INT val
= INTVAL (index
);
8772 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8773 If vldr is selected it uses arm_coproc_mem_operand. */
8775 return val
> -256 && val
< 256;
8777 return val
> -4096 && val
< 4092;
8780 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
8783 if (GET_MODE_SIZE (mode
) <= 4
8787 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
8791 rtx xiop0
= XEXP (index
, 0);
8792 rtx xiop1
= XEXP (index
, 1);
8794 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8795 && power_of_two_operand (xiop1
, SImode
))
8796 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8797 && power_of_two_operand (xiop0
, SImode
)));
8799 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
8800 || code
== ASHIFT
|| code
== ROTATERT
)
8802 rtx op
= XEXP (index
, 1);
8804 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8807 && INTVAL (op
) <= 31);
8811 /* For ARM v4 we may be doing a sign-extend operation during the
8817 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
8823 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
8825 return (code
== CONST_INT
8826 && INTVAL (index
) < range
8827 && INTVAL (index
) > -range
);
8830 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8831 index operand. i.e. 1, 2, 4 or 8. */
8833 thumb2_index_mul_operand (rtx op
)
8837 if (!CONST_INT_P (op
))
8841 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
8844 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8846 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
8848 enum rtx_code code
= GET_CODE (index
);
8850 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8851 /* Standard coprocessor addressing modes. */
8853 && (mode
== SFmode
|| mode
== DFmode
))
8854 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8855 /* Thumb-2 allows only > -256 index range for it's core register
8856 load/stores. Since we allow SF/DF in core registers, we have
8857 to use the intersection between -256~4096 (core) and -1024~1024
8859 && INTVAL (index
) > -256
8860 && (INTVAL (index
) & 3) == 0);
8862 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8864 /* For DImode assume values will usually live in core regs
8865 and only allow LDRD addressing modes. */
8866 if (!TARGET_LDRD
|| mode
!= DImode
)
8867 return (code
== CONST_INT
8868 && INTVAL (index
) < 1024
8869 && INTVAL (index
) > -1024
8870 && (INTVAL (index
) & 3) == 0);
8873 /* For quad modes, we restrict the constant offset to be slightly less
8874 than what the instruction format permits. We do this because for
8875 quad mode moves, we will actually decompose them into two separate
8876 double-mode reads or writes. INDEX must therefore be a valid
8877 (double-mode) offset and so should INDEX+8. */
8878 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8879 return (code
== CONST_INT
8880 && INTVAL (index
) < 1016
8881 && INTVAL (index
) > -1024
8882 && (INTVAL (index
) & 3) == 0);
8884 /* We have no such constraint on double mode offsets, so we permit the
8885 full range of the instruction format. */
8886 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8887 return (code
== CONST_INT
8888 && INTVAL (index
) < 1024
8889 && INTVAL (index
) > -1024
8890 && (INTVAL (index
) & 3) == 0);
8892 if (arm_address_register_rtx_p (index
, strict_p
)
8893 && (GET_MODE_SIZE (mode
) <= 4))
8896 if (mode
== DImode
|| mode
== DFmode
)
8898 if (code
== CONST_INT
)
8900 HOST_WIDE_INT val
= INTVAL (index
);
8901 /* Thumb-2 ldrd only has reg+const addressing modes.
8902 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8903 If vldr is selected it uses arm_coproc_mem_operand. */
8905 return IN_RANGE (val
, -1020, 1020) && (val
& 3) == 0;
8907 return IN_RANGE (val
, -255, 4095 - 4);
8915 rtx xiop0
= XEXP (index
, 0);
8916 rtx xiop1
= XEXP (index
, 1);
8918 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8919 && thumb2_index_mul_operand (xiop1
))
8920 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8921 && thumb2_index_mul_operand (xiop0
)));
8923 else if (code
== ASHIFT
)
8925 rtx op
= XEXP (index
, 1);
8927 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8930 && INTVAL (op
) <= 3);
8933 return (code
== CONST_INT
8934 && INTVAL (index
) < 4096
8935 && INTVAL (index
) > -256);
8938 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8940 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8950 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8952 return (regno
<= LAST_LO_REGNUM
8953 || regno
> LAST_VIRTUAL_REGISTER
8954 || regno
== FRAME_POINTER_REGNUM
8955 || (GET_MODE_SIZE (mode
) >= 4
8956 && (regno
== STACK_POINTER_REGNUM
8957 || regno
>= FIRST_PSEUDO_REGISTER
8958 || x
== hard_frame_pointer_rtx
8959 || x
== arg_pointer_rtx
)));
8962 /* Return nonzero if x is a legitimate index register. This is the case
8963 for any base register that can access a QImode object. */
8965 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
8967 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
8970 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8972 The AP may be eliminated to either the SP or the FP, so we use the
8973 least common denominator, e.g. SImode, and offsets from 0 to 64.
8975 ??? Verify whether the above is the right approach.
8977 ??? Also, the FP may be eliminated to the SP, so perhaps that
8978 needs special handling also.
8980 ??? Look at how the mips16 port solves this problem. It probably uses
8981 better ways to solve some of these problems.
8983 Although it is not incorrect, we don't accept QImode and HImode
8984 addresses based on the frame pointer or arg pointer until the
8985 reload pass starts. This is so that eliminating such addresses
8986 into stack based ones won't produce impossible code. */
8988 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8990 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
8993 /* ??? Not clear if this is right. Experiment. */
8994 if (GET_MODE_SIZE (mode
) < 4
8995 && !(reload_in_progress
|| reload_completed
)
8996 && (reg_mentioned_p (frame_pointer_rtx
, x
)
8997 || reg_mentioned_p (arg_pointer_rtx
, x
)
8998 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
8999 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
9000 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
9001 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
9004 /* Accept any base register. SP only in SImode or larger. */
9005 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
9008 /* This is PC relative data before arm_reorg runs. */
9009 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
9011 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
9012 && !arm_disable_literal_pool
)
9015 /* This is PC relative data after arm_reorg runs. */
9016 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
9019 || (GET_CODE (x
) == CONST
9020 && GET_CODE (XEXP (x
, 0)) == PLUS
9021 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
9022 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
9025 /* Post-inc indexing only supported for SImode and larger. */
9026 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
9027 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
9030 else if (GET_CODE (x
) == PLUS
)
9032 /* REG+REG address can be any two index registers. */
9033 /* We disallow FRAME+REG addressing since we know that FRAME
9034 will be replaced with STACK, and SP relative addressing only
9035 permits SP+OFFSET. */
9036 if (GET_MODE_SIZE (mode
) <= 4
9037 && XEXP (x
, 0) != frame_pointer_rtx
9038 && XEXP (x
, 1) != frame_pointer_rtx
9039 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
9040 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
9041 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
9044 /* REG+const has 5-7 bit offset for non-SP registers. */
9045 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
9046 || XEXP (x
, 0) == arg_pointer_rtx
)
9047 && CONST_INT_P (XEXP (x
, 1))
9048 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
9051 /* REG+const has 10-bit offset for SP, but only SImode and
9052 larger is supported. */
9053 /* ??? Should probably check for DI/DFmode overflow here
9054 just like GO_IF_LEGITIMATE_OFFSET does. */
9055 else if (REG_P (XEXP (x
, 0))
9056 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
9057 && GET_MODE_SIZE (mode
) >= 4
9058 && CONST_INT_P (XEXP (x
, 1))
9059 && INTVAL (XEXP (x
, 1)) >= 0
9060 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
9061 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
9064 else if (REG_P (XEXP (x
, 0))
9065 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
9066 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
9067 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
9068 && REGNO (XEXP (x
, 0))
9069 <= LAST_VIRTUAL_POINTER_REGISTER
))
9070 && GET_MODE_SIZE (mode
) >= 4
9071 && CONST_INT_P (XEXP (x
, 1))
9072 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
9076 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
9077 && GET_MODE_SIZE (mode
) == 4
9079 && CONSTANT_POOL_ADDRESS_P (x
)
9080 && !arm_disable_literal_pool
9082 && symbol_mentioned_p (get_pool_constant (x
))
9083 && ! pcrel_constant_p (get_pool_constant (x
))))
9089 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9090 instruction of mode MODE. */
9092 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
9094 switch (GET_MODE_SIZE (mode
))
9097 return val
>= 0 && val
< 32;
9100 return val
>= 0 && val
< 64 && (val
& 1) == 0;
9104 && (val
+ GET_MODE_SIZE (mode
)) <= 128
9110 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
9113 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
9114 else if (TARGET_THUMB2
)
9115 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
9116 else /* if (TARGET_THUMB1) */
9117 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
9120 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9122 Given an rtx X being reloaded into a reg required to be
9123 in class CLASS, return the class of reg to actually use.
9124 In general this is just CLASS, but for the Thumb core registers and
9125 immediate constants we prefer a LO_REGS class or a subset. */
9128 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
9134 if (rclass
== GENERAL_REGS
)
9141 /* Build the SYMBOL_REF for __tls_get_addr. */
9143 static GTY(()) rtx tls_get_addr_libfunc
;
9146 get_tls_get_addr (void)
9148 if (!tls_get_addr_libfunc
)
9149 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
9150 return tls_get_addr_libfunc
;
9154 arm_load_tp (rtx target
)
9157 target
= gen_reg_rtx (SImode
);
9161 /* Can return in any reg. */
9162 emit_insn (gen_load_tp_hard (target
));
9166 /* Always returned in r0. Immediately copy the result into a pseudo,
9167 otherwise other uses of r0 (e.g. setting up function arguments) may
9168 clobber the value. */
9174 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
9175 rtx initial_fdpic_reg
= get_hard_reg_initial_val (Pmode
, FDPIC_REGNUM
);
9177 emit_insn (gen_load_tp_soft_fdpic ());
9180 emit_insn (gen_restore_pic_register_after_call(fdpic_reg
, initial_fdpic_reg
));
9183 emit_insn (gen_load_tp_soft ());
9185 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
9186 emit_move_insn (target
, tmp
);
9192 load_tls_operand (rtx x
, rtx reg
)
9196 if (reg
== NULL_RTX
)
9197 reg
= gen_reg_rtx (SImode
);
9199 tmp
= gen_rtx_CONST (SImode
, x
);
9201 emit_move_insn (reg
, tmp
);
9207 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
9209 rtx label
, labelno
= NULL_RTX
, sum
;
9211 gcc_assert (reloc
!= TLS_DESCSEQ
);
9216 sum
= gen_rtx_UNSPEC (Pmode
,
9217 gen_rtvec (2, x
, GEN_INT (reloc
)),
9222 labelno
= GEN_INT (pic_labelno
++);
9223 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9224 label
= gen_rtx_CONST (VOIDmode
, label
);
9226 sum
= gen_rtx_UNSPEC (Pmode
,
9227 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
9228 GEN_INT (TARGET_ARM
? 8 : 4)),
9231 reg
= load_tls_operand (sum
, reg
);
9234 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
9235 else if (TARGET_ARM
)
9236 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
9238 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
9240 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
9241 LCT_PURE
, /* LCT_CONST? */
9244 rtx_insn
*insns
= get_insns ();
9251 arm_tls_descseq_addr (rtx x
, rtx reg
)
9253 rtx labelno
= GEN_INT (pic_labelno
++);
9254 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9255 rtx sum
= gen_rtx_UNSPEC (Pmode
,
9256 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
9257 gen_rtx_CONST (VOIDmode
, label
),
9258 GEN_INT (!TARGET_ARM
)),
9260 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
9262 emit_insn (gen_tlscall (x
, labelno
));
9264 reg
= gen_reg_rtx (SImode
);
9266 gcc_assert (REGNO (reg
) != R0_REGNUM
);
9268 emit_move_insn (reg
, reg0
);
9275 legitimize_tls_address (rtx x
, rtx reg
)
9277 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
9279 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
9283 case TLS_MODEL_GLOBAL_DYNAMIC
:
9284 if (TARGET_GNU2_TLS
)
9286 gcc_assert (!TARGET_FDPIC
);
9288 reg
= arm_tls_descseq_addr (x
, reg
);
9290 tp
= arm_load_tp (NULL_RTX
);
9292 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
9296 /* Original scheme */
9298 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32_FDPIC
);
9300 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
9301 dest
= gen_reg_rtx (Pmode
);
9302 emit_libcall_block (insns
, dest
, ret
, x
);
9306 case TLS_MODEL_LOCAL_DYNAMIC
:
9307 if (TARGET_GNU2_TLS
)
9309 gcc_assert (!TARGET_FDPIC
);
9311 reg
= arm_tls_descseq_addr (x
, reg
);
9313 tp
= arm_load_tp (NULL_RTX
);
9315 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
9320 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32_FDPIC
);
9322 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
9324 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9325 share the LDM result with other LD model accesses. */
9326 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
9328 dest
= gen_reg_rtx (Pmode
);
9329 emit_libcall_block (insns
, dest
, ret
, eqv
);
9331 /* Load the addend. */
9332 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
9333 GEN_INT (TLS_LDO32
)),
9335 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
9336 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
9340 case TLS_MODEL_INITIAL_EXEC
:
9343 sum
= gen_rtx_UNSPEC (Pmode
,
9344 gen_rtvec (2, x
, GEN_INT (TLS_IE32_FDPIC
)),
9346 reg
= load_tls_operand (sum
, reg
);
9347 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
9348 emit_move_insn (reg
, gen_rtx_MEM (Pmode
, reg
));
9352 labelno
= GEN_INT (pic_labelno
++);
9353 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9354 label
= gen_rtx_CONST (VOIDmode
, label
);
9355 sum
= gen_rtx_UNSPEC (Pmode
,
9356 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
9357 GEN_INT (TARGET_ARM
? 8 : 4)),
9359 reg
= load_tls_operand (sum
, reg
);
9362 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
9363 else if (TARGET_THUMB2
)
9364 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
9367 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
9368 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
9372 tp
= arm_load_tp (NULL_RTX
);
9374 return gen_rtx_PLUS (Pmode
, tp
, reg
);
9376 case TLS_MODEL_LOCAL_EXEC
:
9377 tp
= arm_load_tp (NULL_RTX
);
9379 reg
= gen_rtx_UNSPEC (Pmode
,
9380 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
9382 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
9384 return gen_rtx_PLUS (Pmode
, tp
, reg
);
9391 /* Try machine-dependent ways of modifying an illegitimate address
9392 to be legitimate. If we find one, return the new, valid address. */
9394 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
9396 if (arm_tls_referenced_p (x
))
9400 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
9402 addend
= XEXP (XEXP (x
, 0), 1);
9403 x
= XEXP (XEXP (x
, 0), 0);
9406 if (!SYMBOL_REF_P (x
))
9409 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
9411 x
= legitimize_tls_address (x
, NULL_RTX
);
9415 x
= gen_rtx_PLUS (SImode
, x
, addend
);
9423 return thumb_legitimize_address (x
, orig_x
, mode
);
9425 if (GET_CODE (x
) == PLUS
)
9427 rtx xop0
= XEXP (x
, 0);
9428 rtx xop1
= XEXP (x
, 1);
9430 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
9431 xop0
= force_reg (SImode
, xop0
);
9433 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
9434 && !symbol_mentioned_p (xop1
))
9435 xop1
= force_reg (SImode
, xop1
);
9437 if (ARM_BASE_REGISTER_RTX_P (xop0
)
9438 && CONST_INT_P (xop1
))
9440 HOST_WIDE_INT n
, low_n
;
9444 /* VFP addressing modes actually allow greater offsets, but for
9445 now we just stick with the lowest common denominator. */
9446 if (mode
== DImode
|| mode
== DFmode
)
9458 low_n
= ((mode
) == TImode
? 0
9459 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
9463 base_reg
= gen_reg_rtx (SImode
);
9464 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
9465 emit_move_insn (base_reg
, val
);
9466 x
= plus_constant (Pmode
, base_reg
, low_n
);
9468 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
9469 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
9472 /* XXX We don't allow MINUS any more -- see comment in
9473 arm_legitimate_address_outer_p (). */
9474 else if (GET_CODE (x
) == MINUS
)
9476 rtx xop0
= XEXP (x
, 0);
9477 rtx xop1
= XEXP (x
, 1);
9479 if (CONSTANT_P (xop0
))
9480 xop0
= force_reg (SImode
, xop0
);
9482 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
9483 xop1
= force_reg (SImode
, xop1
);
9485 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
9486 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
9489 /* Make sure to take full advantage of the pre-indexed addressing mode
9490 with absolute addresses which often allows for the base register to
9491 be factorized for multiple adjacent memory references, and it might
9492 even allows for the mini pool to be avoided entirely. */
9493 else if (CONST_INT_P (x
) && optimize
> 0)
9496 HOST_WIDE_INT mask
, base
, index
;
9499 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9500 only use a 8-bit index. So let's use a 12-bit index for
9501 SImode only and hope that arm_gen_constant will enable LDRB
9502 to use more bits. */
9503 bits
= (mode
== SImode
) ? 12 : 8;
9504 mask
= (1 << bits
) - 1;
9505 base
= INTVAL (x
) & ~mask
;
9506 index
= INTVAL (x
) & mask
;
9507 if (TARGET_ARM
&& bit_count (base
& 0xffffffff) > (32 - bits
)/2)
9509 /* It'll most probably be more efficient to generate the
9510 base with more bits set and use a negative index instead.
9511 Don't do this for Thumb as negative offsets are much more
9516 base_reg
= force_reg (SImode
, GEN_INT (base
));
9517 x
= plus_constant (Pmode
, base_reg
, index
);
9522 /* We need to find and carefully transform any SYMBOL and LABEL
9523 references; so go back to the original address expression. */
9524 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9525 false /*compute_now*/);
9527 if (new_x
!= orig_x
)
9535 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9536 to be legitimate. If we find one, return the new, valid address. */
9538 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
9540 if (GET_CODE (x
) == PLUS
9541 && CONST_INT_P (XEXP (x
, 1))
9542 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
9543 || INTVAL (XEXP (x
, 1)) < 0))
9545 rtx xop0
= XEXP (x
, 0);
9546 rtx xop1
= XEXP (x
, 1);
9547 HOST_WIDE_INT offset
= INTVAL (xop1
);
9549 /* Try and fold the offset into a biasing of the base register and
9550 then offsetting that. Don't do this when optimizing for space
9551 since it can cause too many CSEs. */
9552 if (optimize_size
&& offset
>= 0
9553 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
9555 HOST_WIDE_INT delta
;
9558 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
9559 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
9560 delta
= 31 * GET_MODE_SIZE (mode
);
9562 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
9564 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
9566 x
= plus_constant (Pmode
, xop0
, delta
);
9568 else if (offset
< 0 && offset
> -256)
9569 /* Small negative offsets are best done with a subtract before the
9570 dereference, forcing these into a register normally takes two
9572 x
= force_operand (x
, NULL_RTX
);
9575 /* For the remaining cases, force the constant into a register. */
9576 xop1
= force_reg (SImode
, xop1
);
9577 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
9580 else if (GET_CODE (x
) == PLUS
9581 && s_register_operand (XEXP (x
, 1), SImode
)
9582 && !s_register_operand (XEXP (x
, 0), SImode
))
9584 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
9586 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
9591 /* We need to find and carefully transform any SYMBOL and LABEL
9592 references; so go back to the original address expression. */
9593 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9594 false /*compute_now*/);
9596 if (new_x
!= orig_x
)
9603 /* Return TRUE if X contains any TLS symbol references. */
9606 arm_tls_referenced_p (rtx x
)
9608 if (! TARGET_HAVE_TLS
)
9611 subrtx_iterator::array_type array
;
9612 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
9614 const_rtx x
= *iter
;
9615 if (SYMBOL_REF_P (x
) && SYMBOL_REF_TLS_MODEL (x
) != 0)
9617 /* ARM currently does not provide relocations to encode TLS variables
9618 into AArch32 instructions, only data, so there is no way to
9619 currently implement these if a literal pool is disabled. */
9620 if (arm_disable_literal_pool
)
9621 sorry ("accessing thread-local storage is not currently supported "
9622 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9627 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9628 TLS offsets, not real symbol references. */
9629 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9630 iter
.skip_subrtxes ();
9635 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9637 On the ARM, allow any integer (invalid ones are removed later by insn
9638 patterns), nice doubles and symbol_refs which refer to the function's
9641 When generating pic allow anything. */
9644 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
9646 if (GET_CODE (x
) == CONST_VECTOR
&& !neon_make_constant (x
, false))
9649 return flag_pic
|| !label_mentioned_p (x
);
9653 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9655 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9656 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9657 for ARMv8-M Baseline or later the result is valid. */
9658 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
9661 return (CONST_INT_P (x
)
9662 || CONST_DOUBLE_P (x
)
9663 || CONSTANT_ADDRESS_P (x
)
9664 || (TARGET_HAVE_MOVT
&& SYMBOL_REF_P (x
))
9665 /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9666 we build the symbol address with upper/lower
9669 && !label_mentioned_p (x
)
9670 && arm_valid_symbolic_address_p (x
)
9671 && arm_disable_literal_pool
)
9676 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
9678 return (!arm_cannot_force_const_mem (mode
, x
)
9680 ? arm_legitimate_constant_p_1 (mode
, x
)
9681 : thumb_legitimate_constant_p (mode
, x
)));
9684 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9687 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9690 split_const (x
, &base
, &offset
);
9692 if (SYMBOL_REF_P (base
))
9694 /* Function symbols cannot have an offset due to the Thumb bit. */
9695 if ((SYMBOL_REF_FLAGS (base
) & SYMBOL_FLAG_FUNCTION
)
9696 && INTVAL (offset
) != 0)
9699 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9700 && !offset_within_block_p (base
, INTVAL (offset
)))
9703 return arm_tls_referenced_p (x
);
9706 #define REG_OR_SUBREG_REG(X) \
9708 || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9710 #define REG_OR_SUBREG_RTX(X) \
9711 (REG_P (X) ? (X) : SUBREG_REG (X))
9714 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9716 machine_mode mode
= GET_MODE (x
);
9725 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9732 return COSTS_N_INSNS (1);
9735 if (arm_arch6m
&& arm_m_profile_small_mul
)
9736 return COSTS_N_INSNS (32);
9738 if (CONST_INT_P (XEXP (x
, 1)))
9741 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
9748 return COSTS_N_INSNS (2) + cycles
;
9750 return COSTS_N_INSNS (1) + 16;
9753 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9755 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9756 return (COSTS_N_INSNS (words
)
9757 + 4 * ((MEM_P (SET_SRC (x
)))
9758 + MEM_P (SET_DEST (x
))));
9763 if (UINTVAL (x
) < 256
9764 /* 16-bit constant. */
9765 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
9767 if (thumb_shiftable_const (INTVAL (x
)))
9768 return COSTS_N_INSNS (2);
9769 return arm_disable_literal_pool
9771 : COSTS_N_INSNS (3);
9773 else if ((outer
== PLUS
|| outer
== COMPARE
)
9774 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9776 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9777 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9778 return COSTS_N_INSNS (1);
9779 else if (outer
== AND
)
9782 /* This duplicates the tests in the andsi3 expander. */
9783 for (i
= 9; i
<= 31; i
++)
9784 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9785 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9786 return COSTS_N_INSNS (2);
9788 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9789 || outer
== LSHIFTRT
)
9791 return COSTS_N_INSNS (2);
9797 return COSTS_N_INSNS (3);
9815 /* XXX another guess. */
9816 /* Memory costs quite a lot for the first word, but subsequent words
9817 load at the equivalent of a single insn each. */
9818 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9819 + ((SYMBOL_REF_P (x
) && CONSTANT_POOL_ADDRESS_P (x
))
9824 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9830 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
9831 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
9837 return total
+ COSTS_N_INSNS (1);
9839 /* Assume a two-shift sequence. Increase the cost slightly so
9840 we prefer actual shifts over an extend operation. */
9841 return total
+ 1 + COSTS_N_INSNS (2);
9848 /* Estimates the size cost of thumb1 instructions.
9849 For now most of the code is copied from thumb1_rtx_costs. We need more
9850 fine grain tuning when we have more related test cases. */
9852 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9854 machine_mode mode
= GET_MODE (x
);
9863 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9867 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9868 defined by RTL expansion, especially for the expansion of
9870 if ((GET_CODE (XEXP (x
, 0)) == MULT
9871 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
9872 || (GET_CODE (XEXP (x
, 1)) == MULT
9873 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
9874 return COSTS_N_INSNS (2);
9879 return COSTS_N_INSNS (1);
9882 if (CONST_INT_P (XEXP (x
, 1)))
9884 /* Thumb1 mul instruction can't operate on const. We must Load it
9885 into a register first. */
9886 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
9887 /* For the targets which have a very small and high-latency multiply
9888 unit, we prefer to synthesize the mult with up to 5 instructions,
9889 giving a good balance between size and performance. */
9890 if (arm_arch6m
&& arm_m_profile_small_mul
)
9891 return COSTS_N_INSNS (5);
9893 return COSTS_N_INSNS (1) + const_size
;
9895 return COSTS_N_INSNS (1);
9898 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9900 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9901 cost
= COSTS_N_INSNS (words
);
9902 if (satisfies_constraint_J (SET_SRC (x
))
9903 || satisfies_constraint_K (SET_SRC (x
))
9904 /* Too big an immediate for a 2-byte mov, using MOVT. */
9905 || (CONST_INT_P (SET_SRC (x
))
9906 && UINTVAL (SET_SRC (x
)) >= 256
9908 && satisfies_constraint_j (SET_SRC (x
)))
9909 /* thumb1_movdi_insn. */
9910 || ((words
> 1) && MEM_P (SET_SRC (x
))))
9911 cost
+= COSTS_N_INSNS (1);
9917 if (UINTVAL (x
) < 256)
9918 return COSTS_N_INSNS (1);
9919 /* movw is 4byte long. */
9920 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
9921 return COSTS_N_INSNS (2);
9922 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9923 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9924 return COSTS_N_INSNS (2);
9925 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9926 if (thumb_shiftable_const (INTVAL (x
)))
9927 return COSTS_N_INSNS (2);
9928 return arm_disable_literal_pool
9930 : COSTS_N_INSNS (3);
9932 else if ((outer
== PLUS
|| outer
== COMPARE
)
9933 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9935 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9936 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9937 return COSTS_N_INSNS (1);
9938 else if (outer
== AND
)
9941 /* This duplicates the tests in the andsi3 expander. */
9942 for (i
= 9; i
<= 31; i
++)
9943 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9944 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9945 return COSTS_N_INSNS (2);
9947 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9948 || outer
== LSHIFTRT
)
9950 return COSTS_N_INSNS (2);
9956 return COSTS_N_INSNS (3);
9970 return COSTS_N_INSNS (1);
9973 return (COSTS_N_INSNS (1)
9975 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9976 + ((SYMBOL_REF_P (x
) && CONSTANT_POOL_ADDRESS_P (x
))
9977 ? COSTS_N_INSNS (1) : 0));
9981 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9986 /* XXX still guessing. */
9987 switch (GET_MODE (XEXP (x
, 0)))
9990 return (1 + (mode
== DImode
? 4 : 0)
9991 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9994 return (4 + (mode
== DImode
? 4 : 0)
9995 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9998 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
10009 /* Helper function for arm_rtx_costs. If one operand of the OP, a
10010 PLUS, adds the carry flag, then return the other operand. If
10011 neither is a carry, return OP unchanged. */
10013 strip_carry_operation (rtx op
)
10015 gcc_assert (GET_CODE (op
) == PLUS
);
10016 if (arm_carry_operation (XEXP (op
, 0), GET_MODE (op
)))
10017 return XEXP (op
, 1);
10018 else if (arm_carry_operation (XEXP (op
, 1), GET_MODE (op
)))
10019 return XEXP (op
, 0);
10023 /* Helper function for arm_rtx_costs. If the operand is a valid shift
10024 operand, then return the operand that is being shifted. If the shift
10025 is not by a constant, then set SHIFT_REG to point to the operand.
10026 Return NULL if OP is not a shifter operand. */
10028 shifter_op_p (rtx op
, rtx
*shift_reg
)
10030 enum rtx_code code
= GET_CODE (op
);
10032 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
10033 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
10034 return XEXP (op
, 0);
10035 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
10036 return XEXP (op
, 0);
10037 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
10038 || code
== ASHIFTRT
)
10040 if (!CONST_INT_P (XEXP (op
, 1)))
10041 *shift_reg
= XEXP (op
, 1);
10042 return XEXP (op
, 0);
10049 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
10051 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
10052 rtx_code code
= GET_CODE (x
);
10053 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
10055 switch (XINT (x
, 1))
10057 case UNSPEC_UNALIGNED_LOAD
:
10058 /* We can only do unaligned loads into the integer unit, and we can't
10059 use LDM or LDRD. */
10060 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
10062 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
10063 + extra_cost
->ldst
.load_unaligned
);
10066 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
10067 ADDR_SPACE_GENERIC
, speed_p
);
10071 case UNSPEC_UNALIGNED_STORE
:
10072 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
10074 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
10075 + extra_cost
->ldst
.store_unaligned
);
10077 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
10079 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
10080 ADDR_SPACE_GENERIC
, speed_p
);
10084 case UNSPEC_VRINTZ
:
10085 case UNSPEC_VRINTP
:
10086 case UNSPEC_VRINTM
:
10087 case UNSPEC_VRINTR
:
10088 case UNSPEC_VRINTX
:
10089 case UNSPEC_VRINTA
:
10091 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
10095 *cost
= COSTS_N_INSNS (2);
10101 /* Cost of a libcall. We assume one insn per argument, an amount for the
10102 call (one insn for -Os) and then one for processing the result. */
10103 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10105 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
10108 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
10109 if (shift_op != NULL \
10110 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
10115 *cost += extra_cost->alu.arith_shift_reg; \
10116 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10117 ASHIFT, 1, speed_p); \
10119 else if (speed_p) \
10120 *cost += extra_cost->alu.arith_shift; \
10122 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
10123 ASHIFT, 0, speed_p) \
10124 + rtx_cost (XEXP (x, 1 - IDX), \
10125 GET_MODE (shift_op), \
10126 OP, 1, speed_p)); \
10132 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
10133 considering the costs of the addressing mode and memory access
10136 arm_mem_costs (rtx x
, const struct cpu_cost_table
*extra_cost
,
10137 int *cost
, bool speed_p
)
10139 machine_mode mode
= GET_MODE (x
);
10141 *cost
= COSTS_N_INSNS (1);
10144 && GET_CODE (XEXP (x
, 0)) == PLUS
10145 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
10146 /* This will be split into two instructions. Add the cost of the
10147 additional instruction here. The cost of the memory access is computed
10148 below. See arm.md:calculate_pic_address. */
10149 *cost
+= COSTS_N_INSNS (1);
10151 /* Calculate cost of the addressing mode. */
10154 arm_addr_mode_op op_type
;
10155 switch (GET_CODE (XEXP (x
, 0)))
10159 op_type
= AMO_DEFAULT
;
10162 /* MINUS does not appear in RTL, but the architecture supports it,
10163 so handle this case defensively. */
10166 op_type
= AMO_NO_WB
;
10178 if (VECTOR_MODE_P (mode
))
10179 *cost
+= current_tune
->addr_mode_costs
->vector
[op_type
];
10180 else if (FLOAT_MODE_P (mode
))
10181 *cost
+= current_tune
->addr_mode_costs
->fp
[op_type
];
10183 *cost
+= current_tune
->addr_mode_costs
->integer
[op_type
];
10186 /* Calculate cost of memory access. */
10189 if (FLOAT_MODE_P (mode
))
10191 if (GET_MODE_SIZE (mode
) == 8)
10192 *cost
+= extra_cost
->ldst
.loadd
;
10194 *cost
+= extra_cost
->ldst
.loadf
;
10196 else if (VECTOR_MODE_P (mode
))
10197 *cost
+= extra_cost
->ldst
.loadv
;
10200 /* Integer modes */
10201 if (GET_MODE_SIZE (mode
) == 8)
10202 *cost
+= extra_cost
->ldst
.ldrd
;
10204 *cost
+= extra_cost
->ldst
.load
;
10211 /* Helper for arm_bfi_p. */
10213 arm_bfi_1_p (rtx op0
, rtx op1
, rtx
*sub0
, rtx
*sub1
)
10215 unsigned HOST_WIDE_INT const1
;
10216 unsigned HOST_WIDE_INT const2
= 0;
10218 if (!CONST_INT_P (XEXP (op0
, 1)))
10221 const1
= UINTVAL (XEXP (op0
, 1));
10222 if (!CONST_INT_P (XEXP (op1
, 1))
10223 || ~UINTVAL (XEXP (op1
, 1)) != const1
)
10226 if (GET_CODE (XEXP (op0
, 0)) == ASHIFT
10227 && CONST_INT_P (XEXP (XEXP (op0
, 0), 1)))
10229 const2
= UINTVAL (XEXP (XEXP (op0
, 0), 1));
10230 *sub0
= XEXP (XEXP (op0
, 0), 0);
10233 *sub0
= XEXP (op0
, 0);
10235 if (const2
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
10238 *sub1
= XEXP (op1
, 0);
10239 return exact_log2 (const1
+ (HOST_WIDE_INT_1U
<< const2
)) >= 0;
10242 /* Recognize a BFI idiom. Helper for arm_rtx_costs_internal. The
10243 format looks something like:
10245 (IOR (AND (reg1) (~const1))
10246 (AND (ASHIFT (reg2) (const2))
10249 where const1 is a consecutive sequence of 1-bits with the
10250 least-significant non-zero bit starting at bit position const2. If
10251 const2 is zero, then the shift will not appear at all, due to
10252 canonicalization. The two arms of the IOR expression may be
10255 arm_bfi_p (rtx x
, rtx
*sub0
, rtx
*sub1
)
10257 if (GET_CODE (x
) != IOR
)
10259 if (GET_CODE (XEXP (x
, 0)) != AND
10260 || GET_CODE (XEXP (x
, 1)) != AND
)
10262 return (arm_bfi_1_p (XEXP (x
, 0), XEXP (x
, 1), sub0
, sub1
)
10263 || arm_bfi_1_p (XEXP (x
, 1), XEXP (x
, 0), sub1
, sub0
));
10266 /* RTX costs. Make an estimate of the cost of executing the operation
10267 X, which is contained within an operation with code OUTER_CODE.
10268 SPEED_P indicates whether the cost desired is the performance cost,
10269 or the size cost. The estimate is stored in COST and the return
10270 value is TRUE if the cost calculation is final, or FALSE if the
10271 caller should recurse through the operands of X to add additional
10274 We currently make no attempt to model the size savings of Thumb-2
10275 16-bit instructions. At the normal points in compilation where
10276 this code is called we have no measure of whether the condition
10277 flags are live or not, and thus no realistic way to determine what
10278 the size will eventually be. */
10280 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10281 const struct cpu_cost_table
*extra_cost
,
10282 int *cost
, bool speed_p
)
10284 machine_mode mode
= GET_MODE (x
);
10286 *cost
= COSTS_N_INSNS (1);
10291 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
10293 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
10301 /* SET RTXs don't have a mode so we get it from the destination. */
10302 mode
= GET_MODE (SET_DEST (x
));
10304 if (REG_P (SET_SRC (x
))
10305 && REG_P (SET_DEST (x
)))
10307 /* Assume that most copies can be done with a single insn,
10308 unless we don't have HW FP, in which case everything
10309 larger than word mode will require two insns. */
10310 *cost
= COSTS_N_INSNS (((!TARGET_VFP_BASE
10311 && GET_MODE_SIZE (mode
) > 4)
10314 /* Conditional register moves can be encoded
10315 in 16 bits in Thumb mode. */
10316 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
10322 if (CONST_INT_P (SET_SRC (x
)))
10324 /* Handle CONST_INT here, since the value doesn't have a mode
10325 and we would otherwise be unable to work out the true cost. */
10326 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
10329 /* Slightly lower the cost of setting a core reg to a constant.
10330 This helps break up chains and allows for better scheduling. */
10331 if (REG_P (SET_DEST (x
))
10332 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
10335 /* Immediate moves with an immediate in the range [0, 255] can be
10336 encoded in 16 bits in Thumb mode. */
10337 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
10338 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
10340 goto const_int_cost
;
10346 return arm_mem_costs (x
, extra_cost
, cost
, speed_p
);
10350 /* Calculations of LDM costs are complex. We assume an initial cost
10351 (ldm_1st) which will load the number of registers mentioned in
10352 ldm_regs_per_insn_1st registers; then each additional
10353 ldm_regs_per_insn_subsequent registers cost one more insn. The
10354 formula for N regs is thus:
10356 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10357 + ldm_regs_per_insn_subsequent - 1)
10358 / ldm_regs_per_insn_subsequent).
10360 Additional costs may also be added for addressing. A similar
10361 formula is used for STM. */
10363 bool is_ldm
= load_multiple_operation (x
, SImode
);
10364 bool is_stm
= store_multiple_operation (x
, SImode
);
10366 if (is_ldm
|| is_stm
)
10370 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
10371 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
10372 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
10373 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
10374 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
10375 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
10376 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
10378 *cost
+= regs_per_insn_1st
10379 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
10380 + regs_per_insn_sub
- 1)
10381 / regs_per_insn_sub
);
10390 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10391 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10392 *cost
+= COSTS_N_INSNS (speed_p
10393 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
10394 else if (mode
== SImode
&& TARGET_IDIV
)
10395 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
10397 *cost
= LIBCALL_COST (2);
10399 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10400 possible udiv is prefered. */
10401 *cost
+= (code
== DIV
? COSTS_N_INSNS (1) : 0);
10402 return false; /* All arguments must be in registers. */
10405 /* MOD by a power of 2 can be expanded as:
10407 and r0, r0, #(n - 1)
10408 and r1, r1, #(n - 1)
10409 rsbpl r0, r1, #0. */
10410 if (CONST_INT_P (XEXP (x
, 1))
10411 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
10414 *cost
+= COSTS_N_INSNS (3);
10417 *cost
+= 2 * extra_cost
->alu
.logical
10418 + extra_cost
->alu
.arith
;
10422 /* Fall-through. */
10424 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10425 possible udiv is prefered. */
10426 *cost
= LIBCALL_COST (2) + (code
== MOD
? COSTS_N_INSNS (1) : 0);
10427 return false; /* All arguments must be in registers. */
10430 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
10432 *cost
+= (COSTS_N_INSNS (1)
10433 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
10435 *cost
+= extra_cost
->alu
.shift_reg
;
10443 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
10445 *cost
+= (COSTS_N_INSNS (2)
10446 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
10448 *cost
+= 2 * extra_cost
->alu
.shift
;
10449 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10450 if (code
== ASHIFT
&& XEXP (x
, 1) == CONST1_RTX (SImode
))
10454 else if (mode
== SImode
)
10456 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10457 /* Slightly disparage register shifts at -Os, but not by much. */
10458 if (!CONST_INT_P (XEXP (x
, 1)))
10459 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
10460 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10463 else if (GET_MODE_CLASS (mode
) == MODE_INT
10464 && GET_MODE_SIZE (mode
) < 4)
10466 if (code
== ASHIFT
)
10468 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10469 /* Slightly disparage register shifts at -Os, but not by
10471 if (!CONST_INT_P (XEXP (x
, 1)))
10472 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
10473 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10475 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
10477 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
10479 /* Can use SBFX/UBFX. */
10481 *cost
+= extra_cost
->alu
.bfx
;
10482 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10486 *cost
+= COSTS_N_INSNS (1);
10487 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10490 if (CONST_INT_P (XEXP (x
, 1)))
10491 *cost
+= 2 * extra_cost
->alu
.shift
;
10493 *cost
+= (extra_cost
->alu
.shift
10494 + extra_cost
->alu
.shift_reg
);
10497 /* Slightly disparage register shifts. */
10498 *cost
+= !CONST_INT_P (XEXP (x
, 1));
10501 else /* Rotates. */
10503 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
10504 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10507 if (CONST_INT_P (XEXP (x
, 1)))
10508 *cost
+= (2 * extra_cost
->alu
.shift
10509 + extra_cost
->alu
.log_shift
);
10511 *cost
+= (extra_cost
->alu
.shift
10512 + extra_cost
->alu
.shift_reg
10513 + extra_cost
->alu
.log_shift_reg
);
10519 *cost
= LIBCALL_COST (2);
10525 if (mode
== SImode
)
10528 *cost
+= extra_cost
->alu
.rev
;
10535 /* No rev instruction available. Look at arm_legacy_rev
10536 and thumb_legacy_rev for the form of RTL used then. */
10539 *cost
+= COSTS_N_INSNS (9);
10543 *cost
+= 6 * extra_cost
->alu
.shift
;
10544 *cost
+= 3 * extra_cost
->alu
.logical
;
10549 *cost
+= COSTS_N_INSNS (4);
10553 *cost
+= 2 * extra_cost
->alu
.shift
;
10554 *cost
+= extra_cost
->alu
.arith_shift
;
10555 *cost
+= 2 * extra_cost
->alu
.logical
;
10563 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10564 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10566 if (GET_CODE (XEXP (x
, 0)) == MULT
10567 || GET_CODE (XEXP (x
, 1)) == MULT
)
10569 rtx mul_op0
, mul_op1
, sub_op
;
10572 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10574 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10576 mul_op0
= XEXP (XEXP (x
, 0), 0);
10577 mul_op1
= XEXP (XEXP (x
, 0), 1);
10578 sub_op
= XEXP (x
, 1);
10582 mul_op0
= XEXP (XEXP (x
, 1), 0);
10583 mul_op1
= XEXP (XEXP (x
, 1), 1);
10584 sub_op
= XEXP (x
, 0);
10587 /* The first operand of the multiply may be optionally
10589 if (GET_CODE (mul_op0
) == NEG
)
10590 mul_op0
= XEXP (mul_op0
, 0);
10592 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10593 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10594 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
10600 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10604 if (mode
== SImode
)
10606 rtx shift_by_reg
= NULL
;
10609 rtx op0
= XEXP (x
, 0);
10610 rtx op1
= XEXP (x
, 1);
10612 /* Factor out any borrow operation. There's more than one way
10613 of expressing this; try to recognize them all. */
10614 if (GET_CODE (op0
) == MINUS
)
10616 if (arm_borrow_operation (op1
, SImode
))
10618 op1
= XEXP (op0
, 1);
10619 op0
= XEXP (op0
, 0);
10621 else if (arm_borrow_operation (XEXP (op0
, 1), SImode
))
10622 op0
= XEXP (op0
, 0);
10624 else if (GET_CODE (op1
) == PLUS
10625 && arm_borrow_operation (XEXP (op1
, 0), SImode
))
10626 op1
= XEXP (op1
, 0);
10627 else if (GET_CODE (op0
) == NEG
10628 && arm_borrow_operation (op1
, SImode
))
10630 /* Negate with carry-in. For Thumb2 this is done with
10631 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10632 RSC instruction that exists in Arm mode. */
10634 *cost
+= (TARGET_THUMB2
10635 ? extra_cost
->alu
.arith_shift
10636 : extra_cost
->alu
.arith
);
10637 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, MINUS
, 0, speed_p
);
10640 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10641 Note we do mean ~borrow here. */
10642 else if (TARGET_ARM
&& arm_carry_operation (op0
, SImode
))
10644 *cost
+= rtx_cost (op1
, mode
, code
, 1, speed_p
);
10648 shift_op
= shifter_op_p (op0
, &shift_by_reg
);
10649 if (shift_op
== NULL
)
10651 shift_op
= shifter_op_p (op1
, &shift_by_reg
);
10652 non_shift_op
= op0
;
10655 non_shift_op
= op1
;
10657 if (shift_op
!= NULL
)
10659 if (shift_by_reg
!= NULL
)
10662 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10663 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
10666 *cost
+= extra_cost
->alu
.arith_shift
;
10668 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
10669 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
10673 if (arm_arch_thumb2
10674 && GET_CODE (XEXP (x
, 1)) == MULT
)
10678 *cost
+= extra_cost
->mult
[0].add
;
10679 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
10680 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
10681 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
10685 if (CONST_INT_P (op0
))
10687 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
10688 INTVAL (op0
), NULL_RTX
,
10690 *cost
= COSTS_N_INSNS (insns
);
10692 *cost
+= insns
* extra_cost
->alu
.arith
;
10693 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10697 *cost
+= extra_cost
->alu
.arith
;
10699 /* Don't recurse as we don't want to cost any borrow that
10701 *cost
+= rtx_cost (op0
, mode
, MINUS
, 0, speed_p
);
10702 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10706 if (GET_MODE_CLASS (mode
) == MODE_INT
10707 && GET_MODE_SIZE (mode
) < 4)
10709 rtx shift_op
, shift_reg
;
10712 /* We check both sides of the MINUS for shifter operands since,
10713 unlike PLUS, it's not commutative. */
10715 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0);
10716 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1);
10718 /* Slightly disparage, as we might need to widen the result. */
10721 *cost
+= extra_cost
->alu
.arith
;
10723 if (CONST_INT_P (XEXP (x
, 0)))
10725 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10732 if (mode
== DImode
)
10734 *cost
+= COSTS_N_INSNS (1);
10736 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
10738 rtx op1
= XEXP (x
, 1);
10741 *cost
+= 2 * extra_cost
->alu
.arith
;
10743 if (GET_CODE (op1
) == ZERO_EXTEND
)
10744 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
10747 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10748 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10752 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10755 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
10756 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
10758 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
10761 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10762 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
10765 *cost
+= (extra_cost
->alu
.arith
10766 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10767 ? extra_cost
->alu
.arith
10768 : extra_cost
->alu
.arith_shift
));
10769 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
10770 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10771 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
10776 *cost
+= 2 * extra_cost
->alu
.arith
;
10782 *cost
= LIBCALL_COST (2);
10786 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10787 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10789 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10791 rtx mul_op0
, mul_op1
, add_op
;
10794 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10796 mul_op0
= XEXP (XEXP (x
, 0), 0);
10797 mul_op1
= XEXP (XEXP (x
, 0), 1);
10798 add_op
= XEXP (x
, 1);
10800 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10801 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10802 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
10808 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10811 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10813 *cost
= LIBCALL_COST (2);
10817 /* Narrow modes can be synthesized in SImode, but the range
10818 of useful sub-operations is limited. Check for shift operations
10819 on one of the operands. Only left shifts can be used in the
10821 if (GET_MODE_CLASS (mode
) == MODE_INT
10822 && GET_MODE_SIZE (mode
) < 4)
10824 rtx shift_op
, shift_reg
;
10827 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0);
10829 if (CONST_INT_P (XEXP (x
, 1)))
10831 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10832 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10834 *cost
= COSTS_N_INSNS (insns
);
10836 *cost
+= insns
* extra_cost
->alu
.arith
;
10837 /* Slightly penalize a narrow operation as the result may
10839 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10843 /* Slightly penalize a narrow operation as the result may
10847 *cost
+= extra_cost
->alu
.arith
;
10852 if (mode
== SImode
)
10854 rtx shift_op
, shift_reg
;
10856 if (TARGET_INT_SIMD
10857 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10858 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10860 /* UXTA[BH] or SXTA[BH]. */
10862 *cost
+= extra_cost
->alu
.extend_arith
;
10863 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10865 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
10869 rtx op0
= XEXP (x
, 0);
10870 rtx op1
= XEXP (x
, 1);
10872 /* Handle a side effect of adding in the carry to an addition. */
10873 if (GET_CODE (op0
) == PLUS
10874 && arm_carry_operation (op1
, mode
))
10876 op1
= XEXP (op0
, 1);
10877 op0
= XEXP (op0
, 0);
10879 else if (GET_CODE (op1
) == PLUS
10880 && arm_carry_operation (op0
, mode
))
10882 op0
= XEXP (op1
, 0);
10883 op1
= XEXP (op1
, 1);
10885 else if (GET_CODE (op0
) == PLUS
)
10887 op0
= strip_carry_operation (op0
);
10888 if (swap_commutative_operands_p (op0
, op1
))
10889 std::swap (op0
, op1
);
10892 if (arm_carry_operation (op0
, mode
))
10894 /* Adding the carry to a register is a canonicalization of
10895 adding 0 to the register plus the carry. */
10897 *cost
+= extra_cost
->alu
.arith
;
10898 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
10903 shift_op
= shifter_op_p (op0
, &shift_reg
);
10904 if (shift_op
!= NULL
)
10909 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10910 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10913 *cost
+= extra_cost
->alu
.arith_shift
;
10915 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10916 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10920 if (GET_CODE (op0
) == MULT
)
10924 if (TARGET_DSP_MULTIPLY
10925 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
10926 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10927 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10928 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10929 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
10930 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
10931 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
10932 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
10933 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10934 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10935 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10936 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
10939 /* SMLA[BT][BT]. */
10941 *cost
+= extra_cost
->mult
[0].extend_add
;
10942 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
10943 SIGN_EXTEND
, 0, speed_p
)
10944 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
10945 SIGN_EXTEND
, 0, speed_p
)
10946 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10951 *cost
+= extra_cost
->mult
[0].add
;
10952 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
10953 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
10954 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10958 if (CONST_INT_P (op1
))
10960 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10961 INTVAL (op1
), NULL_RTX
,
10963 *cost
= COSTS_N_INSNS (insns
);
10965 *cost
+= insns
* extra_cost
->alu
.arith
;
10966 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
10971 *cost
+= extra_cost
->alu
.arith
;
10973 /* Don't recurse here because we want to test the operands
10974 without any carry operation. */
10975 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
10976 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
10980 if (mode
== DImode
)
10982 if (GET_CODE (XEXP (x
, 0)) == MULT
10983 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
10984 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
10985 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10986 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10989 *cost
+= extra_cost
->mult
[1].extend_add
;
10990 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10991 ZERO_EXTEND
, 0, speed_p
)
10992 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
10993 ZERO_EXTEND
, 0, speed_p
)
10994 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10998 *cost
+= COSTS_N_INSNS (1);
11000 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11001 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
11004 *cost
+= (extra_cost
->alu
.arith
11005 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11006 ? extra_cost
->alu
.arith
11007 : extra_cost
->alu
.arith_shift
));
11009 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
11011 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
11016 *cost
+= 2 * extra_cost
->alu
.arith
;
11021 *cost
= LIBCALL_COST (2);
11026 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
11029 *cost
+= extra_cost
->alu
.rev
;
11033 else if (mode
== SImode
&& arm_arch_thumb2
11034 && arm_bfi_p (x
, &sub0
, &sub1
))
11036 *cost
+= rtx_cost (sub0
, mode
, ZERO_EXTRACT
, 1, speed_p
);
11037 *cost
+= rtx_cost (sub1
, mode
, ZERO_EXTRACT
, 0, speed_p
);
11039 *cost
+= extra_cost
->alu
.bfi
;
11045 /* Fall through. */
11046 case AND
: case XOR
:
11047 if (mode
== SImode
)
11049 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
11050 rtx op0
= XEXP (x
, 0);
11051 rtx shift_op
, shift_reg
;
11055 || (code
== IOR
&& TARGET_THUMB2
)))
11056 op0
= XEXP (op0
, 0);
11059 shift_op
= shifter_op_p (op0
, &shift_reg
);
11060 if (shift_op
!= NULL
)
11065 *cost
+= extra_cost
->alu
.log_shift_reg
;
11066 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
11069 *cost
+= extra_cost
->alu
.log_shift
;
11071 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
11072 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
11076 if (CONST_INT_P (XEXP (x
, 1)))
11078 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
11079 INTVAL (XEXP (x
, 1)), NULL_RTX
,
11082 *cost
= COSTS_N_INSNS (insns
);
11084 *cost
+= insns
* extra_cost
->alu
.logical
;
11085 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
11090 *cost
+= extra_cost
->alu
.logical
;
11091 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
11092 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
11096 if (mode
== DImode
)
11098 rtx op0
= XEXP (x
, 0);
11099 enum rtx_code subcode
= GET_CODE (op0
);
11101 *cost
+= COSTS_N_INSNS (1);
11105 || (code
== IOR
&& TARGET_THUMB2
)))
11106 op0
= XEXP (op0
, 0);
11108 if (GET_CODE (op0
) == ZERO_EXTEND
)
11111 *cost
+= 2 * extra_cost
->alu
.logical
;
11113 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
11115 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
11118 else if (GET_CODE (op0
) == SIGN_EXTEND
)
11121 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
11123 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
11125 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
11130 *cost
+= 2 * extra_cost
->alu
.logical
;
11136 *cost
= LIBCALL_COST (2);
11140 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11141 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11143 rtx op0
= XEXP (x
, 0);
11145 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
11146 op0
= XEXP (op0
, 0);
11149 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
11151 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
11152 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
11155 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11157 *cost
= LIBCALL_COST (2);
11161 if (mode
== SImode
)
11163 if (TARGET_DSP_MULTIPLY
11164 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
11165 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
11166 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
11167 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
11168 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
11169 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
11170 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11171 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
11172 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
11173 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
11174 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
11175 && (INTVAL (XEXP (XEXP (x
, 1), 1))
11178 /* SMUL[TB][TB]. */
11180 *cost
+= extra_cost
->mult
[0].extend
;
11181 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
11182 SIGN_EXTEND
, 0, speed_p
);
11183 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
11184 SIGN_EXTEND
, 1, speed_p
);
11188 *cost
+= extra_cost
->mult
[0].simple
;
11192 if (mode
== DImode
)
11194 if ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11195 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
11196 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
11197 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
))
11200 *cost
+= extra_cost
->mult
[1].extend
;
11201 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
11202 ZERO_EXTEND
, 0, speed_p
)
11203 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
11204 ZERO_EXTEND
, 0, speed_p
));
11208 *cost
= LIBCALL_COST (2);
11213 *cost
= LIBCALL_COST (2);
11217 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11218 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11220 if (GET_CODE (XEXP (x
, 0)) == MULT
)
11223 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
11228 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
11232 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11234 *cost
= LIBCALL_COST (1);
11238 if (mode
== SImode
)
11240 if (GET_CODE (XEXP (x
, 0)) == ABS
)
11242 *cost
+= COSTS_N_INSNS (1);
11243 /* Assume the non-flag-changing variant. */
11245 *cost
+= (extra_cost
->alu
.log_shift
11246 + extra_cost
->alu
.arith_shift
);
11247 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
11251 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
11252 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
11254 *cost
+= COSTS_N_INSNS (1);
11255 /* No extra cost for MOV imm and MVN imm. */
11256 /* If the comparison op is using the flags, there's no further
11257 cost, otherwise we need to add the cost of the comparison. */
11258 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
11259 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
11260 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
11262 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
11263 *cost
+= (COSTS_N_INSNS (1)
11264 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
11266 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
11269 *cost
+= extra_cost
->alu
.arith
;
11275 *cost
+= extra_cost
->alu
.arith
;
11279 if (GET_MODE_CLASS (mode
) == MODE_INT
11280 && GET_MODE_SIZE (mode
) < 4)
11282 /* Slightly disparage, as we might need an extend operation. */
11285 *cost
+= extra_cost
->alu
.arith
;
11289 if (mode
== DImode
)
11291 *cost
+= COSTS_N_INSNS (1);
11293 *cost
+= 2 * extra_cost
->alu
.arith
;
11298 *cost
= LIBCALL_COST (1);
11302 if (mode
== SImode
)
11305 rtx shift_reg
= NULL
;
11307 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
11311 if (shift_reg
!= NULL
)
11314 *cost
+= extra_cost
->alu
.log_shift_reg
;
11315 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
11318 *cost
+= extra_cost
->alu
.log_shift
;
11319 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
11324 *cost
+= extra_cost
->alu
.logical
;
11327 if (mode
== DImode
)
11329 *cost
+= COSTS_N_INSNS (1);
11335 *cost
+= LIBCALL_COST (1);
11340 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
11342 *cost
+= COSTS_N_INSNS (3);
11345 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
11346 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
11348 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
11349 /* Assume that if one arm of the if_then_else is a register,
11350 that it will be tied with the result and eliminate the
11351 conditional insn. */
11352 if (REG_P (XEXP (x
, 1)))
11354 else if (REG_P (XEXP (x
, 2)))
11360 if (extra_cost
->alu
.non_exec_costs_exec
)
11361 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
11363 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
11366 *cost
+= op1cost
+ op2cost
;
11372 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
11376 machine_mode op0mode
;
11377 /* We'll mostly assume that the cost of a compare is the cost of the
11378 LHS. However, there are some notable exceptions. */
11380 /* Floating point compares are never done as side-effects. */
11381 op0mode
= GET_MODE (XEXP (x
, 0));
11382 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
11383 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11386 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
11388 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
11390 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
11396 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
11398 *cost
= LIBCALL_COST (2);
11402 /* DImode compares normally take two insns. */
11403 if (op0mode
== DImode
)
11405 *cost
+= COSTS_N_INSNS (1);
11407 *cost
+= 2 * extra_cost
->alu
.arith
;
11411 if (op0mode
== SImode
)
11416 if (XEXP (x
, 1) == const0_rtx
11417 && !(REG_P (XEXP (x
, 0))
11418 || (GET_CODE (XEXP (x
, 0)) == SUBREG
11419 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
11421 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
11423 /* Multiply operations that set the flags are often
11424 significantly more expensive. */
11426 && GET_CODE (XEXP (x
, 0)) == MULT
11427 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
11428 *cost
+= extra_cost
->mult
[0].flag_setting
;
11431 && GET_CODE (XEXP (x
, 0)) == PLUS
11432 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11433 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
11435 *cost
+= extra_cost
->mult
[0].flag_setting
;
11440 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
11441 if (shift_op
!= NULL
)
11443 if (shift_reg
!= NULL
)
11445 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
11448 *cost
+= extra_cost
->alu
.arith_shift_reg
;
11451 *cost
+= extra_cost
->alu
.arith_shift
;
11452 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
11453 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
11458 *cost
+= extra_cost
->alu
.arith
;
11459 if (CONST_INT_P (XEXP (x
, 1))
11460 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
11462 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
11470 *cost
= LIBCALL_COST (2);
11480 /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11483 && TARGET_HARD_FLOAT
11484 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
11485 && (XEXP (x
, 1) == CONST0_RTX (mode
)))
11491 /* Fall through. */
11505 if (outer_code
== SET
)
11507 /* Is it a store-flag operation? */
11508 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
11509 && XEXP (x
, 1) == const0_rtx
)
11511 /* Thumb also needs an IT insn. */
11512 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
11515 if (XEXP (x
, 1) == const0_rtx
)
11520 /* LSR Rd, Rn, #31. */
11522 *cost
+= extra_cost
->alu
.shift
;
11532 *cost
+= COSTS_N_INSNS (1);
11536 /* RSBS T1, Rn, Rn, LSR #31
11538 *cost
+= COSTS_N_INSNS (1);
11540 *cost
+= extra_cost
->alu
.arith_shift
;
11544 /* RSB Rd, Rn, Rn, ASR #1
11545 LSR Rd, Rd, #31. */
11546 *cost
+= COSTS_N_INSNS (1);
11548 *cost
+= (extra_cost
->alu
.arith_shift
11549 + extra_cost
->alu
.shift
);
11555 *cost
+= COSTS_N_INSNS (1);
11557 *cost
+= extra_cost
->alu
.shift
;
11561 /* Remaining cases are either meaningless or would take
11562 three insns anyway. */
11563 *cost
= COSTS_N_INSNS (3);
11566 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11571 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
11572 if (CONST_INT_P (XEXP (x
, 1))
11573 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
11575 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11582 /* Not directly inside a set. If it involves the condition code
11583 register it must be the condition for a branch, cond_exec or
11584 I_T_E operation. Since the comparison is performed elsewhere
11585 this is just the control part which has no additional
11587 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
11588 && XEXP (x
, 1) == const0_rtx
)
11596 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11597 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11600 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
11604 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11606 *cost
= LIBCALL_COST (1);
11610 if (mode
== SImode
)
11613 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
11617 *cost
= LIBCALL_COST (1);
11621 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
11622 && MEM_P (XEXP (x
, 0)))
11624 if (mode
== DImode
)
11625 *cost
+= COSTS_N_INSNS (1);
11630 if (GET_MODE (XEXP (x
, 0)) == SImode
)
11631 *cost
+= extra_cost
->ldst
.load
;
11633 *cost
+= extra_cost
->ldst
.load_sign_extend
;
11635 if (mode
== DImode
)
11636 *cost
+= extra_cost
->alu
.shift
;
11641 /* Widening from less than 32-bits requires an extend operation. */
11642 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11644 /* We have SXTB/SXTH. */
11645 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11647 *cost
+= extra_cost
->alu
.extend
;
11649 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11651 /* Needs two shifts. */
11652 *cost
+= COSTS_N_INSNS (1);
11653 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11655 *cost
+= 2 * extra_cost
->alu
.shift
;
11658 /* Widening beyond 32-bits requires one more insn. */
11659 if (mode
== DImode
)
11661 *cost
+= COSTS_N_INSNS (1);
11663 *cost
+= extra_cost
->alu
.shift
;
11670 || GET_MODE (XEXP (x
, 0)) == SImode
11671 || GET_MODE (XEXP (x
, 0)) == QImode
)
11672 && MEM_P (XEXP (x
, 0)))
11674 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11676 if (mode
== DImode
)
11677 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11682 /* Widening from less than 32-bits requires an extend operation. */
11683 if (GET_MODE (XEXP (x
, 0)) == QImode
)
11685 /* UXTB can be a shorter instruction in Thumb2, but it might
11686 be slower than the AND Rd, Rn, #255 alternative. When
11687 optimizing for speed it should never be slower to use
11688 AND, and we don't really model 16-bit vs 32-bit insns
11691 *cost
+= extra_cost
->alu
.logical
;
11693 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11695 /* We have UXTB/UXTH. */
11696 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11698 *cost
+= extra_cost
->alu
.extend
;
11700 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11702 /* Needs two shifts. It's marginally preferable to use
11703 shifts rather than two BIC instructions as the second
11704 shift may merge with a subsequent insn as a shifter
11706 *cost
= COSTS_N_INSNS (2);
11707 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11709 *cost
+= 2 * extra_cost
->alu
.shift
;
11712 /* Widening beyond 32-bits requires one more insn. */
11713 if (mode
== DImode
)
11715 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11722 /* CONST_INT has no mode, so we cannot tell for sure how many
11723 insns are really going to be needed. The best we can do is
11724 look at the value passed. If it fits in SImode, then assume
11725 that's the mode it will be used for. Otherwise assume it
11726 will be used in DImode. */
11727 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
11732 /* Avoid blowing up in arm_gen_constant (). */
11733 if (!(outer_code
== PLUS
11734 || outer_code
== AND
11735 || outer_code
== IOR
11736 || outer_code
== XOR
11737 || outer_code
== MINUS
))
11741 if (mode
== SImode
)
11743 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
11744 INTVAL (x
), NULL
, NULL
,
11750 *cost
+= COSTS_N_INSNS (arm_gen_constant
11751 (outer_code
, SImode
, NULL
,
11752 trunc_int_for_mode (INTVAL (x
), SImode
),
11754 + arm_gen_constant (outer_code
, SImode
, NULL
,
11755 INTVAL (x
) >> 32, NULL
,
11767 if (arm_arch_thumb2
&& !flag_pic
)
11768 *cost
+= COSTS_N_INSNS (1);
11770 *cost
+= extra_cost
->ldst
.load
;
11773 *cost
+= COSTS_N_INSNS (1);
11777 *cost
+= COSTS_N_INSNS (1);
11779 *cost
+= extra_cost
->alu
.arith
;
11785 *cost
= COSTS_N_INSNS (4);
11790 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11791 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11793 if (vfp3_const_double_rtx (x
))
11796 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
11802 if (mode
== DFmode
)
11803 *cost
+= extra_cost
->ldst
.loadd
;
11805 *cost
+= extra_cost
->ldst
.loadf
;
11808 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
11812 *cost
= COSTS_N_INSNS (4);
11817 if (((TARGET_NEON
&& TARGET_HARD_FLOAT
11818 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
)))
11819 || TARGET_HAVE_MVE
)
11820 && simd_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
11821 *cost
= COSTS_N_INSNS (1);
11823 *cost
= COSTS_N_INSNS (4);
11828 /* When optimizing for size, we prefer constant pool entries to
11829 MOVW/MOVT pairs, so bump the cost of these slightly. */
11836 *cost
+= extra_cost
->alu
.clz
;
11840 if (XEXP (x
, 1) == const0_rtx
)
11843 *cost
+= extra_cost
->alu
.log_shift
;
11844 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11847 /* Fall through. */
11851 *cost
+= COSTS_N_INSNS (1);
11855 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
11856 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11857 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
11858 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11859 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
11860 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
11861 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
11862 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11866 *cost
+= extra_cost
->mult
[1].extend
;
11867 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
11868 ZERO_EXTEND
, 0, speed_p
)
11869 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
11870 ZERO_EXTEND
, 0, speed_p
));
11873 *cost
= LIBCALL_COST (1);
11876 case UNSPEC_VOLATILE
:
11878 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
11881 /* Reading the PC is like reading any other register. Writing it
11882 is more expensive, but we take that into account elsewhere. */
11887 /* TODO: Simple zero_extract of bottom bits using AND. */
11888 /* Fall through. */
11892 && CONST_INT_P (XEXP (x
, 1))
11893 && CONST_INT_P (XEXP (x
, 2)))
11896 *cost
+= extra_cost
->alu
.bfx
;
11897 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11900 /* Without UBFX/SBFX, need to resort to shift operations. */
11901 *cost
+= COSTS_N_INSNS (1);
11903 *cost
+= 2 * extra_cost
->alu
.shift
;
11904 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
11908 if (TARGET_HARD_FLOAT
)
11911 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
11913 && GET_MODE (XEXP (x
, 0)) == HFmode
)
11915 /* Pre v8, widening HF->DF is a two-step process, first
11916 widening to SFmode. */
11917 *cost
+= COSTS_N_INSNS (1);
11919 *cost
+= extra_cost
->fp
[0].widen
;
11921 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11925 *cost
= LIBCALL_COST (1);
11928 case FLOAT_TRUNCATE
:
11929 if (TARGET_HARD_FLOAT
)
11932 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
11933 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11935 /* Vector modes? */
11937 *cost
= LIBCALL_COST (1);
11941 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
11943 rtx op0
= XEXP (x
, 0);
11944 rtx op1
= XEXP (x
, 1);
11945 rtx op2
= XEXP (x
, 2);
11948 /* vfms or vfnma. */
11949 if (GET_CODE (op0
) == NEG
)
11950 op0
= XEXP (op0
, 0);
11952 /* vfnms or vfnma. */
11953 if (GET_CODE (op2
) == NEG
)
11954 op2
= XEXP (op2
, 0);
11956 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
11957 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
11958 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
11961 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
11966 *cost
= LIBCALL_COST (3);
11971 if (TARGET_HARD_FLOAT
)
11973 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11974 a vcvt fixed-point conversion. */
11975 if (code
== FIX
&& mode
== SImode
11976 && GET_CODE (XEXP (x
, 0)) == FIX
11977 && GET_MODE (XEXP (x
, 0)) == SFmode
11978 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11979 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11983 *cost
+= extra_cost
->fp
[0].toint
;
11985 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
11990 if (GET_MODE_CLASS (mode
) == MODE_INT
)
11992 mode
= GET_MODE (XEXP (x
, 0));
11994 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
11995 /* Strip of the 'cost' of rounding towards zero. */
11996 if (GET_CODE (XEXP (x
, 0)) == FIX
)
11997 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
12000 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
12001 /* ??? Increase the cost to deal with transferring from
12002 FP -> CORE registers? */
12005 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
12009 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
12012 /* Vector costs? */
12014 *cost
= LIBCALL_COST (1);
12018 case UNSIGNED_FLOAT
:
12019 if (TARGET_HARD_FLOAT
)
12021 /* ??? Increase the cost to deal with transferring from CORE
12022 -> FP registers? */
12024 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
12027 *cost
= LIBCALL_COST (1);
12035 /* Just a guess. Guess number of instructions in the asm
12036 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
12037 though (see PR60663). */
12038 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
12039 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
12041 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
12045 if (mode
!= VOIDmode
)
12046 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
12048 *cost
= COSTS_N_INSNS (4); /* Who knows? */
12053 #undef HANDLE_NARROW_SHIFT_ARITH
12055 /* RTX costs entry point. */
12058 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
12059 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
12062 int code
= GET_CODE (x
);
12063 gcc_assert (current_tune
->insn_extra_cost
);
12065 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
12066 (enum rtx_code
) outer_code
,
12067 current_tune
->insn_extra_cost
,
12070 if (dump_file
&& arm_verbose_cost
)
12072 print_rtl_single (dump_file
, x
);
12073 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
12074 *total
, result
? "final" : "partial");
12080 arm_insn_cost (rtx_insn
*insn
, bool speed
)
12084 /* Don't cost a simple reg-reg move at a full insn cost: such moves
12085 will likely disappear during register allocation. */
12086 if (!reload_completed
12087 && GET_CODE (PATTERN (insn
)) == SET
12088 && REG_P (SET_DEST (PATTERN (insn
)))
12089 && REG_P (SET_SRC (PATTERN (insn
))))
12091 cost
= pattern_cost (PATTERN (insn
), speed
);
12092 /* If the cost is zero, then it's likely a complex insn. We don't want the
12093 cost of these to be less than something we know about. */
12094 return cost
? cost
: COSTS_N_INSNS (2);
12097 /* All address computations that can be done are free, but rtx cost returns
12098 the same for practically all of them. So we weight the different types
12099 of address here in the order (most pref first):
12100 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
12102 arm_arm_address_cost (rtx x
)
12104 enum rtx_code c
= GET_CODE (x
);
12106 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
12108 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
12113 if (CONST_INT_P (XEXP (x
, 1)))
12116 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
12126 arm_thumb_address_cost (rtx x
)
12128 enum rtx_code c
= GET_CODE (x
);
12133 && REG_P (XEXP (x
, 0))
12134 && CONST_INT_P (XEXP (x
, 1)))
12141 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
12142 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
12144 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
12147 /* Adjust cost hook for XScale. */
12149 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12152 /* Some true dependencies can have a higher cost depending
12153 on precisely how certain input operands are used. */
12155 && recog_memoized (insn
) >= 0
12156 && recog_memoized (dep
) >= 0)
12158 int shift_opnum
= get_attr_shift (insn
);
12159 enum attr_type attr_type
= get_attr_type (dep
);
12161 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12162 operand for INSN. If we have a shifted input operand and the
12163 instruction we depend on is another ALU instruction, then we may
12164 have to account for an additional stall. */
12165 if (shift_opnum
!= 0
12166 && (attr_type
== TYPE_ALU_SHIFT_IMM_LSL_1TO4
12167 || attr_type
== TYPE_ALU_SHIFT_IMM_OTHER
12168 || attr_type
== TYPE_ALUS_SHIFT_IMM
12169 || attr_type
== TYPE_LOGIC_SHIFT_IMM
12170 || attr_type
== TYPE_LOGICS_SHIFT_IMM
12171 || attr_type
== TYPE_ALU_SHIFT_REG
12172 || attr_type
== TYPE_ALUS_SHIFT_REG
12173 || attr_type
== TYPE_LOGIC_SHIFT_REG
12174 || attr_type
== TYPE_LOGICS_SHIFT_REG
12175 || attr_type
== TYPE_MOV_SHIFT
12176 || attr_type
== TYPE_MVN_SHIFT
12177 || attr_type
== TYPE_MOV_SHIFT_REG
12178 || attr_type
== TYPE_MVN_SHIFT_REG
))
12180 rtx shifted_operand
;
12183 /* Get the shifted operand. */
12184 extract_insn (insn
);
12185 shifted_operand
= recog_data
.operand
[shift_opnum
];
12187 /* Iterate over all the operands in DEP. If we write an operand
12188 that overlaps with SHIFTED_OPERAND, then we have increase the
12189 cost of this dependency. */
12190 extract_insn (dep
);
12191 preprocess_constraints (dep
);
12192 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
12194 /* We can ignore strict inputs. */
12195 if (recog_data
.operand_type
[opno
] == OP_IN
)
12198 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
12210 /* Adjust cost hook for Cortex A9. */
12212 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12222 case REG_DEP_OUTPUT
:
12223 if (recog_memoized (insn
) >= 0
12224 && recog_memoized (dep
) >= 0)
12226 if (GET_CODE (PATTERN (insn
)) == SET
)
12229 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
12231 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
12233 enum attr_type attr_type_insn
= get_attr_type (insn
);
12234 enum attr_type attr_type_dep
= get_attr_type (dep
);
12236 /* By default all dependencies of the form
12239 have an extra latency of 1 cycle because
12240 of the input and output dependency in this
12241 case. However this gets modeled as an true
12242 dependency and hence all these checks. */
12243 if (REG_P (SET_DEST (PATTERN (insn
)))
12244 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
12246 /* FMACS is a special case where the dependent
12247 instruction can be issued 3 cycles before
12248 the normal latency in case of an output
12250 if ((attr_type_insn
== TYPE_FMACS
12251 || attr_type_insn
== TYPE_FMACD
)
12252 && (attr_type_dep
== TYPE_FMACS
12253 || attr_type_dep
== TYPE_FMACD
))
12255 if (dep_type
== REG_DEP_OUTPUT
)
12256 *cost
= insn_default_latency (dep
) - 3;
12258 *cost
= insn_default_latency (dep
);
12263 if (dep_type
== REG_DEP_OUTPUT
)
12264 *cost
= insn_default_latency (dep
) + 1;
12266 *cost
= insn_default_latency (dep
);
12276 gcc_unreachable ();
12282 /* Adjust cost hook for FA726TE. */
12284 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12287 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12288 have penalty of 3. */
12289 if (dep_type
== REG_DEP_TRUE
12290 && recog_memoized (insn
) >= 0
12291 && recog_memoized (dep
) >= 0
12292 && get_attr_conds (dep
) == CONDS_SET
)
12294 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12295 if (get_attr_conds (insn
) == CONDS_USE
12296 && get_attr_type (insn
) != TYPE_BRANCH
)
12302 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
12303 || get_attr_conds (insn
) == CONDS_USE
)
12313 /* Implement TARGET_REGISTER_MOVE_COST.
12315 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12316 it is typically more expensive than a single memory access. We set
12317 the cost to less than two memory accesses so that floating
12318 point to integer conversion does not go through memory. */
12321 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
12322 reg_class_t from
, reg_class_t to
)
12326 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
12327 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
12329 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
12330 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
12332 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
12339 if (from
== HI_REGS
|| to
== HI_REGS
)
12346 /* Implement TARGET_MEMORY_MOVE_COST. */
12349 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
12350 bool in ATTRIBUTE_UNUSED
)
12356 if (GET_MODE_SIZE (mode
) < 4)
12359 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
12363 /* Vectorizer cost model implementation. */
12365 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12367 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
12369 int misalign ATTRIBUTE_UNUSED
)
12373 switch (type_of_cost
)
12376 return current_tune
->vec_costs
->scalar_stmt_cost
;
12379 return current_tune
->vec_costs
->scalar_load_cost
;
12382 return current_tune
->vec_costs
->scalar_store_cost
;
12385 return current_tune
->vec_costs
->vec_stmt_cost
;
12388 return current_tune
->vec_costs
->vec_align_load_cost
;
12391 return current_tune
->vec_costs
->vec_store_cost
;
12393 case vec_to_scalar
:
12394 return current_tune
->vec_costs
->vec_to_scalar_cost
;
12396 case scalar_to_vec
:
12397 return current_tune
->vec_costs
->scalar_to_vec_cost
;
12399 case unaligned_load
:
12400 case vector_gather_load
:
12401 return current_tune
->vec_costs
->vec_unalign_load_cost
;
12403 case unaligned_store
:
12404 case vector_scatter_store
:
12405 return current_tune
->vec_costs
->vec_unalign_store_cost
;
12407 case cond_branch_taken
:
12408 return current_tune
->vec_costs
->cond_taken_branch_cost
;
12410 case cond_branch_not_taken
:
12411 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
12414 case vec_promote_demote
:
12415 return current_tune
->vec_costs
->vec_stmt_cost
;
12417 case vec_construct
:
12418 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
12419 return elements
/ 2 + 1;
12422 gcc_unreachable ();
12426 /* Return true if and only if this insn can dual-issue only as older. */
12428 cortexa7_older_only (rtx_insn
*insn
)
12430 if (recog_memoized (insn
) < 0)
12433 switch (get_attr_type (insn
))
12435 case TYPE_ALU_DSP_REG
:
12436 case TYPE_ALU_SREG
:
12437 case TYPE_ALUS_SREG
:
12438 case TYPE_LOGIC_REG
:
12439 case TYPE_LOGICS_REG
:
12441 case TYPE_ADCS_REG
:
12446 case TYPE_SHIFT_IMM
:
12447 case TYPE_SHIFT_REG
:
12448 case TYPE_LOAD_BYTE
:
12451 case TYPE_FFARITHS
:
12453 case TYPE_FFARITHD
:
12471 case TYPE_F_STORES
:
12478 /* Return true if and only if this insn can dual-issue as younger. */
12480 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
12482 if (recog_memoized (insn
) < 0)
12485 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
12489 switch (get_attr_type (insn
))
12492 case TYPE_ALUS_IMM
:
12493 case TYPE_LOGIC_IMM
:
12494 case TYPE_LOGICS_IMM
:
12499 case TYPE_MOV_SHIFT
:
12500 case TYPE_MOV_SHIFT_REG
:
12510 /* Look for an instruction that can dual issue only as an older
12511 instruction, and move it in front of any instructions that can
12512 dual-issue as younger, while preserving the relative order of all
12513 other instructions in the ready list. This is a hueuristic to help
12514 dual-issue in later cycles, by postponing issue of more flexible
12515 instructions. This heuristic may affect dual issue opportunities
12516 in the current cycle. */
12518 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
12519 int *n_readyp
, int clock
)
12522 int first_older_only
= -1, first_younger
= -1;
12526 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12530 /* Traverse the ready list from the head (the instruction to issue
12531 first), and looking for the first instruction that can issue as
12532 younger and the first instruction that can dual-issue only as
12534 for (i
= *n_readyp
- 1; i
>= 0; i
--)
12536 rtx_insn
*insn
= ready
[i
];
12537 if (cortexa7_older_only (insn
))
12539 first_older_only
= i
;
12541 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
12544 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
12548 /* Nothing to reorder because either no younger insn found or insn
12549 that can dual-issue only as older appears before any insn that
12550 can dual-issue as younger. */
12551 if (first_younger
== -1)
12554 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
12558 /* Nothing to reorder because no older-only insn in the ready list. */
12559 if (first_older_only
== -1)
12562 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
12566 /* Move first_older_only insn before first_younger. */
12568 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
12569 INSN_UID(ready
[first_older_only
]),
12570 INSN_UID(ready
[first_younger
]));
12571 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
12572 for (i
= first_older_only
; i
< first_younger
; i
++)
12574 ready
[i
] = ready
[i
+1];
12577 ready
[i
] = first_older_only_insn
;
12581 /* Implement TARGET_SCHED_REORDER. */
12583 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
12588 case TARGET_CPU_cortexa7
:
12589 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
12592 /* Do nothing for other cores. */
12596 return arm_issue_rate ();
12599 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12600 It corrects the value of COST based on the relationship between
12601 INSN and DEP through the dependence LINK. It returns the new
12602 value. There is a per-core adjust_cost hook to adjust scheduler costs
12603 and the per-core hook can choose to completely override the generic
12604 adjust_cost function. Only put bits of code into arm_adjust_cost that
12605 are common across all cores. */
12607 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
12612 /* When generating Thumb-1 code, we want to place flag-setting operations
12613 close to a conditional branch which depends on them, so that we can
12614 omit the comparison. */
12617 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
12618 && recog_memoized (dep
) >= 0
12619 && get_attr_conds (dep
) == CONDS_SET
)
12622 if (current_tune
->sched_adjust_cost
!= NULL
)
12624 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
12628 /* XXX Is this strictly true? */
12629 if (dep_type
== REG_DEP_ANTI
12630 || dep_type
== REG_DEP_OUTPUT
)
12633 /* Call insns don't incur a stall, even if they follow a load. */
12638 if ((i_pat
= single_set (insn
)) != NULL
12639 && MEM_P (SET_SRC (i_pat
))
12640 && (d_pat
= single_set (dep
)) != NULL
12641 && MEM_P (SET_DEST (d_pat
)))
12643 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
12644 /* This is a load after a store, there is no conflict if the load reads
12645 from a cached area. Assume that loads from the stack, and from the
12646 constant pool are cached, and that others will miss. This is a
12649 if ((SYMBOL_REF_P (src_mem
)
12650 && CONSTANT_POOL_ADDRESS_P (src_mem
))
12651 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
12652 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
12653 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
12661 arm_max_conditional_execute (void)
12663 return max_insns_skipped
;
12667 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
12670 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12672 return (optimize
> 0) ? 2 : 0;
12676 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12678 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12681 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12682 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12683 sequences of non-executed instructions in IT blocks probably take the same
12684 amount of time as executed instructions (and the IT instruction itself takes
12685 space in icache). This function was experimentally determined to give good
12686 results on a popular embedded benchmark. */
12689 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12691 return (TARGET_32BIT
&& speed_p
) ? 1
12692 : arm_default_branch_cost (speed_p
, predictable_p
);
12696 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
12698 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12701 static bool fp_consts_inited
= false;
12703 static REAL_VALUE_TYPE value_fp0
;
12706 init_fp_table (void)
12710 r
= REAL_VALUE_ATOF ("0", DFmode
);
12712 fp_consts_inited
= true;
12715 /* Return TRUE if rtx X is a valid immediate FP constant. */
12717 arm_const_double_rtx (rtx x
)
12719 const REAL_VALUE_TYPE
*r
;
12721 if (!fp_consts_inited
)
12724 r
= CONST_DOUBLE_REAL_VALUE (x
);
12725 if (REAL_VALUE_MINUS_ZERO (*r
))
12728 if (real_equal (r
, &value_fp0
))
12734 /* VFPv3 has a fairly wide range of representable immediates, formed from
12735 "quarter-precision" floating-point values. These can be evaluated using this
12736 formula (with ^ for exponentiation):
12740 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12741 16 <= n <= 31 and 0 <= r <= 7.
12743 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12745 - A (most-significant) is the sign bit.
12746 - BCD are the exponent (encoded as r XOR 3).
12747 - EFGH are the mantissa (encoded as n - 16).
12750 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12751 fconst[sd] instruction, or -1 if X isn't suitable. */
12753 vfp3_const_double_index (rtx x
)
12755 REAL_VALUE_TYPE r
, m
;
12756 int sign
, exponent
;
12757 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12758 unsigned HOST_WIDE_INT mask
;
12759 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12762 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12765 r
= *CONST_DOUBLE_REAL_VALUE (x
);
12767 /* We can't represent these things, so detect them first. */
12768 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12771 /* Extract sign, exponent and mantissa. */
12772 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12773 r
= real_value_abs (&r
);
12774 exponent
= REAL_EXP (&r
);
12775 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12776 highest (sign) bit, with a fixed binary point at bit point_pos.
12777 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12778 bits for the mantissa, this may fail (low bits would be lost). */
12779 real_ldexp (&m
, &r
, point_pos
- exponent
);
12780 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12781 mantissa
= w
.elt (0);
12782 mant_hi
= w
.elt (1);
12784 /* If there are bits set in the low part of the mantissa, we can't
12785 represent this value. */
12789 /* Now make it so that mantissa contains the most-significant bits, and move
12790 the point_pos to indicate that the least-significant bits have been
12792 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12793 mantissa
= mant_hi
;
12795 /* We can permit four significant bits of mantissa only, plus a high bit
12796 which is always 1. */
12797 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
12798 if ((mantissa
& mask
) != 0)
12801 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12802 mantissa
>>= point_pos
- 5;
12804 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12805 floating-point immediate zero with Neon using an integer-zero load, but
12806 that case is handled elsewhere.) */
12810 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12812 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12813 normalized significands are in the range [1, 2). (Our mantissa is shifted
12814 left 4 places at this point relative to normalized IEEE754 values). GCC
12815 internally uses [0.5, 1) (see real.cc), so the exponent returned from
12816 REAL_EXP must be altered. */
12817 exponent
= 5 - exponent
;
12819 if (exponent
< 0 || exponent
> 7)
12822 /* Sign, mantissa and exponent are now in the correct form to plug into the
12823 formula described in the comment above. */
12824 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12827 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12829 vfp3_const_double_rtx (rtx x
)
12834 return vfp3_const_double_index (x
) != -1;
12837 /* Recognize immediates which can be used in various Neon and MVE instructions.
12838 Legal immediates are described by the following table (for VMVN variants, the
12839 bitwise inverse of the constant shown is recognized. In either case, VMOV
12840 is output and the correct instruction to use for a given constant is chosen
12841 by the assembler). The constant shown is replicated across all elements of
12842 the destination vector.
12844 insn elems variant constant (binary)
12845 ---- ----- ------- -----------------
12846 vmov i32 0 00000000 00000000 00000000 abcdefgh
12847 vmov i32 1 00000000 00000000 abcdefgh 00000000
12848 vmov i32 2 00000000 abcdefgh 00000000 00000000
12849 vmov i32 3 abcdefgh 00000000 00000000 00000000
12850 vmov i16 4 00000000 abcdefgh
12851 vmov i16 5 abcdefgh 00000000
12852 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12853 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12854 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12855 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12856 vmvn i16 10 00000000 abcdefgh
12857 vmvn i16 11 abcdefgh 00000000
12858 vmov i32 12 00000000 00000000 abcdefgh 11111111
12859 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12860 vmov i32 14 00000000 abcdefgh 11111111 11111111
12861 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12862 vmov i8 16 abcdefgh
12863 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12864 eeeeeeee ffffffff gggggggg hhhhhhhh
12865 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12866 vmov f32 19 00000000 00000000 00000000 00000000
12868 For case 18, B = !b. Representable values are exactly those accepted by
12869 vfp3_const_double_index, but are output as floating-point numbers rather
12872 For case 19, we will change it to vmov.i32 when assembling.
12874 Variants 0-5 (inclusive) may also be used as immediates for the second
12875 operand of VORR/VBIC instructions.
12877 The INVERSE argument causes the bitwise inverse of the given operand to be
12878 recognized instead (used for recognizing legal immediates for the VAND/VORN
12879 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12880 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12881 output, rather than the real insns vbic/vorr).
12883 INVERSE makes no difference to the recognition of float vectors.
12885 The return value is the variant of immediate as shown in the above table, or
12886 -1 if the given value doesn't match any of the listed patterns.
12889 simd_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12890 rtx
*modconst
, int *elementwidth
)
12892 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12894 for (i = 0; i < idx; i += (STRIDE)) \
12899 immtype = (CLASS); \
12900 elsize = (ELSIZE); \
12904 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12905 unsigned int innersize
;
12906 unsigned char bytes
[16] = {};
12907 int immtype
= -1, matches
;
12908 unsigned int invmask
= inverse
? 0xff : 0;
12909 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12912 n_elts
= CONST_VECTOR_NUNITS (op
);
12916 gcc_assert (mode
!= VOIDmode
);
12919 innersize
= GET_MODE_UNIT_SIZE (mode
);
12921 /* Only support 128-bit vectors for MVE. */
12922 if (TARGET_HAVE_MVE
12924 || (GET_MODE_CLASS (mode
) == MODE_VECTOR_BOOL
)
12925 || n_elts
* innersize
!= 16))
12928 if (!TARGET_HAVE_MVE
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_BOOL
)
12931 /* Vectors of float constants. */
12932 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12934 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12936 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12939 /* FP16 vectors cannot be represented. */
12940 if (GET_MODE_INNER (mode
) == HFmode
)
12943 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12944 are distinct in this context. */
12945 if (!const_vec_duplicate_p (op
))
12949 *modconst
= CONST_VECTOR_ELT (op
, 0);
12954 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12960 /* The tricks done in the code below apply for little-endian vector layout.
12961 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12962 FIXME: Implement logic for big-endian vectors. */
12963 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
12966 /* Splat vector constant out into a byte vector. */
12967 for (i
= 0; i
< n_elts
; i
++)
12969 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12970 unsigned HOST_WIDE_INT elpart
;
12972 gcc_assert (CONST_INT_P (el
));
12973 elpart
= INTVAL (el
);
12975 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
12977 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12978 elpart
>>= BITS_PER_UNIT
;
12982 /* Sanity check. */
12983 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12987 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12988 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12990 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12991 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12993 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12994 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12996 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12997 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12999 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
13001 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
13003 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
13004 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
13006 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
13007 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
13009 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13010 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
13012 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13013 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
13015 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
13017 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
13019 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
13020 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
13022 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
13023 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
13025 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13026 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
13028 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
13029 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
13031 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
13033 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
13034 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
13042 *elementwidth
= elsize
;
13046 unsigned HOST_WIDE_INT imm
= 0;
13048 /* Un-invert bytes of recognized vector, if necessary. */
13050 for (i
= 0; i
< idx
; i
++)
13051 bytes
[i
] ^= invmask
;
13055 /* FIXME: Broken on 32-bit H_W_I hosts. */
13056 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
13058 for (i
= 0; i
< 8; i
++)
13059 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
13060 << (i
* BITS_PER_UNIT
);
13062 *modconst
= GEN_INT (imm
);
13066 unsigned HOST_WIDE_INT imm
= 0;
13068 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
13069 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
13071 *modconst
= GEN_INT (imm
);
13079 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13080 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
13081 (or zero for float elements), and a modified constant (whatever should be
13082 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
13083 modified to "simd_immediate_valid_for_move" as this function will be used
13084 both by neon and mve. */
13086 simd_immediate_valid_for_move (rtx op
, machine_mode mode
,
13087 rtx
*modconst
, int *elementwidth
)
13091 int retval
= simd_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
13097 *modconst
= tmpconst
;
13100 *elementwidth
= tmpwidth
;
13105 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
13106 the immediate is valid, write a constant suitable for using as an operand
13107 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13108 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
13111 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
13112 rtx
*modconst
, int *elementwidth
)
13116 int retval
= simd_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
13118 if (retval
< 0 || retval
> 5)
13122 *modconst
= tmpconst
;
13125 *elementwidth
= tmpwidth
;
13130 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
13131 the immediate is valid, write a constant suitable for using as an operand
13132 to VSHR/VSHL to *MODCONST and the corresponding element width to
13133 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13134 because they have different limitations. */
13137 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
13138 rtx
*modconst
, int *elementwidth
,
13141 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
13142 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
13143 unsigned HOST_WIDE_INT last_elt
= 0;
13144 unsigned HOST_WIDE_INT maxshift
;
13146 /* Split vector constant out into a byte vector. */
13147 for (i
= 0; i
< n_elts
; i
++)
13149 rtx el
= CONST_VECTOR_ELT (op
, i
);
13150 unsigned HOST_WIDE_INT elpart
;
13152 if (CONST_INT_P (el
))
13153 elpart
= INTVAL (el
);
13154 else if (CONST_DOUBLE_P (el
))
13157 gcc_unreachable ();
13159 if (i
!= 0 && elpart
!= last_elt
)
13165 /* Shift less than element size. */
13166 maxshift
= innersize
* 8;
13170 /* Left shift immediate value can be from 0 to <size>-1. */
13171 if (last_elt
>= maxshift
)
13176 /* Right shift immediate value can be from 1 to <size>. */
13177 if (last_elt
== 0 || last_elt
> maxshift
)
13182 *elementwidth
= innersize
* 8;
13185 *modconst
= CONST_VECTOR_ELT (op
, 0);
13190 /* Return a string suitable for output of Neon immediate logic operation
13194 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
13195 int inverse
, int quad
)
13197 int width
, is_valid
;
13198 static char templ
[40];
13200 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
13202 gcc_assert (is_valid
!= 0);
13205 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
13207 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
13212 /* Return a string suitable for output of Neon immediate shift operation
13213 (VSHR or VSHL) MNEM. */
13216 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
13217 machine_mode mode
, int quad
,
13220 int width
, is_valid
;
13221 static char templ
[40];
13223 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
13224 gcc_assert (is_valid
!= 0);
13227 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
13229 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
13234 /* Output a sequence of pairwise operations to implement a reduction.
13235 NOTE: We do "too much work" here, because pairwise operations work on two
13236 registers-worth of operands in one go. Unfortunately we can't exploit those
13237 extra calculations to do the full operation in fewer steps, I don't think.
13238 Although all vector elements of the result but the first are ignored, we
13239 actually calculate the same result in each of the elements. An alternative
13240 such as initially loading a vector with zero to use as each of the second
13241 operands would use up an additional register and take an extra instruction,
13242 for no particular gain. */
13245 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
13246 rtx (*reduc
) (rtx
, rtx
, rtx
))
13248 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
13251 for (i
= parts
/ 2; i
>= 1; i
/= 2)
13253 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
13254 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
13259 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13260 loaded into a register using VDUP.
13262 If this is the case, and GENERATE is set, we also generate
13263 instructions to do this and return an RTX to assign to the register. */
13266 neon_vdup_constant (rtx vals
, bool generate
)
13268 machine_mode mode
= GET_MODE (vals
);
13269 machine_mode inner_mode
= GET_MODE_INNER (mode
);
13272 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
13275 if (!const_vec_duplicate_p (vals
, &x
))
13276 /* The elements are not all the same. We could handle repeating
13277 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13278 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13285 /* We can load this constant by using VDUP and a constant in a
13286 single ARM register. This will be cheaper than a vector
13289 x
= copy_to_mode_reg (inner_mode
, x
);
13290 return gen_vec_duplicate (mode
, x
);
13293 /* Return a HI representation of CONST_VEC suitable for MVE predicates. */
13295 mve_bool_vec_to_const (rtx const_vec
)
13297 int n_elts
= GET_MODE_NUNITS ( GET_MODE (const_vec
));
13298 int repeat
= 16 / n_elts
;
13302 for (i
= 0; i
< n_elts
; i
++)
13304 rtx el
= CONST_VECTOR_ELT (const_vec
, i
);
13305 unsigned HOST_WIDE_INT elpart
;
13307 gcc_assert (CONST_INT_P (el
));
13308 elpart
= INTVAL (el
);
13310 for (int j
= 0; j
< repeat
; j
++)
13311 hi_val
|= elpart
<< (i
* repeat
+ j
);
13313 return gen_int_mode (hi_val
, HImode
);
13316 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13317 constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13320 If this is the case, and GENERATE is set, we also generate code to do
13321 this and return an RTX to copy into the register. */
13324 neon_make_constant (rtx vals
, bool generate
)
13326 machine_mode mode
= GET_MODE (vals
);
13328 rtx const_vec
= NULL_RTX
;
13329 int n_elts
= GET_MODE_NUNITS (mode
);
13333 if (GET_CODE (vals
) == CONST_VECTOR
)
13335 else if (GET_CODE (vals
) == PARALLEL
)
13337 /* A CONST_VECTOR must contain only CONST_INTs and
13338 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13339 Only store valid constants in a CONST_VECTOR. */
13340 for (i
= 0; i
< n_elts
; ++i
)
13342 rtx x
= XVECEXP (vals
, 0, i
);
13343 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
13346 if (n_const
== n_elts
)
13347 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
13350 gcc_unreachable ();
13352 if (const_vec
!= NULL
13353 && simd_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
13354 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13356 else if (TARGET_HAVE_MVE
&& (GET_MODE_CLASS (mode
) == MODE_VECTOR_BOOL
))
13357 return mve_bool_vec_to_const (const_vec
);
13358 else if ((target
= neon_vdup_constant (vals
, generate
)) != NULL_RTX
)
13359 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13360 pipeline cycle; creating the constant takes one or two ARM
13361 pipeline cycles. */
13363 else if (const_vec
!= NULL_RTX
)
13364 /* Load from constant pool. On Cortex-A8 this takes two cycles
13365 (for either double or quad vectors). We cannot take advantage
13366 of single-cycle VLD1 because we need a PC-relative addressing
13368 return arm_disable_literal_pool
? NULL_RTX
: const_vec
;
13370 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13371 We cannot construct an initializer. */
13375 /* Initialize vector TARGET to VALS. */
13378 neon_expand_vector_init (rtx target
, rtx vals
)
13380 machine_mode mode
= GET_MODE (target
);
13381 machine_mode inner_mode
= GET_MODE_INNER (mode
);
13382 int n_elts
= GET_MODE_NUNITS (mode
);
13383 int n_var
= 0, one_var
= -1;
13384 bool all_same
= true;
13388 for (i
= 0; i
< n_elts
; ++i
)
13390 x
= XVECEXP (vals
, 0, i
);
13391 if (!CONSTANT_P (x
))
13392 ++n_var
, one_var
= i
;
13394 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
13400 rtx constant
= neon_make_constant (vals
);
13401 if (constant
!= NULL_RTX
)
13403 emit_move_insn (target
, constant
);
13408 /* Splat a single non-constant element if we can. */
13409 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
13411 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
13412 emit_insn (gen_rtx_SET (target
, gen_vec_duplicate (mode
, x
)));
13416 /* One field is non-constant. Load constant then overwrite varying
13417 field. This is more efficient than using the stack. */
13420 rtx copy
= copy_rtx (vals
);
13421 rtx merge_mask
= GEN_INT (1 << one_var
);
13423 /* Load constant part of vector, substitute neighboring value for
13424 varying element. */
13425 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
13426 neon_expand_vector_init (target
, copy
);
13428 /* Insert variable. */
13429 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
13430 emit_insn (gen_vec_set_internal (mode
, target
, x
, merge_mask
, target
));
13434 /* Construct the vector in memory one field at a time
13435 and load the whole vector. */
13436 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
13437 for (i
= 0; i
< n_elts
; i
++)
13438 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
13439 i
* GET_MODE_SIZE (inner_mode
)),
13440 XVECEXP (vals
, 0, i
));
13441 emit_move_insn (target
, mem
);
13444 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13445 ERR if it doesn't. EXP indicates the source location, which includes the
13446 inlining history for intrinsics. */
13449 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
13450 const_tree exp
, const char *desc
)
13452 HOST_WIDE_INT lane
;
13454 gcc_assert (CONST_INT_P (operand
));
13456 lane
= INTVAL (operand
);
13458 if (lane
< low
|| lane
>= high
)
13461 error_at (EXPR_LOCATION (exp
),
13462 "%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
13464 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
13468 /* Bounds-check lanes. */
13471 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
13474 bounds_check (operand
, low
, high
, exp
, "lane");
13477 /* Bounds-check constants. */
13480 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
13482 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
13486 neon_element_bits (machine_mode mode
)
13488 return GET_MODE_UNIT_BITSIZE (mode
);
13492 /* Predicates for `match_operand' and `match_operator'. */
13494 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13495 WB level is 2 if full writeback address modes are allowed, 1
13496 if limited writeback address modes (POST_INC and PRE_DEC) are
13497 allowed and 0 if no writeback at all is supported. */
13500 arm_coproc_mem_operand_wb (rtx op
, int wb_level
)
13502 gcc_assert (wb_level
== 0 || wb_level
== 1 || wb_level
== 2);
13505 /* Reject eliminable registers. */
13506 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
13507 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13508 || reg_mentioned_p (arg_pointer_rtx
, op
)
13509 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13510 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13511 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13512 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13515 /* Constants are converted into offsets from labels. */
13519 ind
= XEXP (op
, 0);
13521 if (reload_completed
13522 && (LABEL_REF_P (ind
)
13523 || (GET_CODE (ind
) == CONST
13524 && GET_CODE (XEXP (ind
, 0)) == PLUS
13525 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13526 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13529 /* Match: (mem (reg)). */
13531 return arm_address_register_rtx_p (ind
, 0);
13533 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13534 acceptable in any case (subject to verification by
13535 arm_address_register_rtx_p). We need full writeback to accept
13536 PRE_INC and POST_DEC, and at least restricted writeback for
13537 PRE_INC and POST_DEC. */
13539 && (GET_CODE (ind
) == POST_INC
13540 || GET_CODE (ind
) == PRE_DEC
13542 && (GET_CODE (ind
) == PRE_INC
13543 || GET_CODE (ind
) == POST_DEC
))))
13544 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13547 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
13548 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
13549 && GET_CODE (XEXP (ind
, 1)) == PLUS
13550 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
13551 ind
= XEXP (ind
, 1);
13557 The encoded immediate for 16-bit modes is multiplied by 2,
13558 while the encoded immediate for 32-bit and 64-bit modes is
13559 multiplied by 4. */
13560 int factor
= MIN (GET_MODE_SIZE (GET_MODE (op
)), 4);
13561 if (GET_CODE (ind
) == PLUS
13562 && REG_P (XEXP (ind
, 0))
13563 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13564 && CONST_INT_P (XEXP (ind
, 1))
13565 && IN_RANGE (INTVAL (XEXP (ind
, 1)), -255 * factor
, 255 * factor
)
13566 && (INTVAL (XEXP (ind
, 1)) & (factor
- 1)) == 0)
13572 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13573 WB is true if full writeback address modes are allowed and is false
13574 if limited writeback address modes (POST_INC and PRE_DEC) are
13577 int arm_coproc_mem_operand (rtx op
, bool wb
)
13579 return arm_coproc_mem_operand_wb (op
, wb
? 2 : 1);
13582 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13583 context in which no writeback address modes are allowed. */
13586 arm_coproc_mem_operand_no_writeback (rtx op
)
13588 return arm_coproc_mem_operand_wb (op
, 0);
13591 /* This function returns TRUE on matching mode and op.
13592 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13593 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13595 mve_vector_mem_operand (machine_mode mode
, rtx op
, bool strict
)
13597 enum rtx_code code
;
13600 /* Match: (mem (reg)). */
13603 int reg_no
= REGNO (op
);
13604 return (((mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
)
13605 ? reg_no
<= LAST_LO_REGNUM
13606 : reg_no
< LAST_ARM_REGNUM
)
13607 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13609 code
= GET_CODE (op
);
13611 if (code
== POST_INC
|| code
== PRE_DEC
13612 || code
== PRE_INC
|| code
== POST_DEC
)
13614 reg_no
= REGNO (XEXP (op
, 0));
13615 return (((mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
)
13616 ? reg_no
<= LAST_LO_REGNUM
13617 :(reg_no
< LAST_ARM_REGNUM
&& reg_no
!= SP_REGNUM
))
13618 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13620 else if (((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
13621 && GET_CODE (XEXP (op
, 1)) == PLUS
13622 && XEXP (op
, 0) == XEXP (XEXP (op
, 1), 0)
13623 && REG_P (XEXP (op
, 0))
13624 && GET_CODE (XEXP (XEXP (op
, 1), 1)) == CONST_INT
)
13625 /* Make sure to only accept PLUS after reload_completed, otherwise
13626 this will interfere with auto_inc's pattern detection. */
13627 || (reload_completed
&& code
== PLUS
&& REG_P (XEXP (op
, 0))
13628 && GET_CODE (XEXP (op
, 1)) == CONST_INT
))
13630 reg_no
= REGNO (XEXP (op
, 0));
13632 val
= INTVAL (XEXP (op
, 1));
13634 val
= INTVAL (XEXP(XEXP (op
, 1), 1));
13641 if (abs (val
) > 127)
13648 if (val
% 2 != 0 || abs (val
) > 254)
13653 if (val
% 4 != 0 || abs (val
) > 508)
13659 return ((!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
)
13660 || (MVE_STN_LDW_MODE (mode
)
13661 ? reg_no
<= LAST_LO_REGNUM
13662 : (reg_no
< LAST_ARM_REGNUM
13663 && (code
== PLUS
|| reg_no
!= SP_REGNUM
))));
13668 /* Return TRUE if OP is a memory operand which we can load or store a vector
13669 to/from. TYPE is one of the following values:
13670 0 - Vector load/stor (vldr)
13671 1 - Core registers (ldm)
13672 2 - Element/structure loads (vld1)
13675 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
13679 /* Reject eliminable registers. */
13680 if (strict
&& ! (reload_in_progress
|| reload_completed
)
13681 && (reg_mentioned_p (frame_pointer_rtx
, op
)
13682 || reg_mentioned_p (arg_pointer_rtx
, op
)
13683 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13684 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13685 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13686 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13689 /* Constants are converted into offsets from labels. */
13693 ind
= XEXP (op
, 0);
13695 if (reload_completed
13696 && (LABEL_REF_P (ind
)
13697 || (GET_CODE (ind
) == CONST
13698 && GET_CODE (XEXP (ind
, 0)) == PLUS
13699 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13700 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13703 /* Match: (mem (reg)). */
13705 return arm_address_register_rtx_p (ind
, 0);
13707 /* Allow post-increment with Neon registers. */
13708 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
13709 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
13710 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13712 /* Allow post-increment by register for VLDn */
13713 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
13714 && GET_CODE (XEXP (ind
, 1)) == PLUS
13715 && REG_P (XEXP (XEXP (ind
, 1), 1))
13716 && REG_P (XEXP (ind
, 0))
13717 && rtx_equal_p (XEXP (ind
, 0), XEXP (XEXP (ind
, 1), 0)))
13724 && GET_CODE (ind
) == PLUS
13725 && REG_P (XEXP (ind
, 0))
13726 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13727 && CONST_INT_P (XEXP (ind
, 1))
13728 && INTVAL (XEXP (ind
, 1)) > -1024
13729 /* For quad modes, we restrict the constant offset to be slightly less
13730 than what the instruction format permits. We have no such constraint
13731 on double mode offsets. (This must match arm_legitimate_index_p.) */
13732 && (INTVAL (XEXP (ind
, 1))
13733 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
13734 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13740 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13743 mve_struct_mem_operand (rtx op
)
13745 rtx ind
= XEXP (op
, 0);
13747 /* Match: (mem (reg)). */
13749 return arm_address_register_rtx_p (ind
, 0);
13751 /* Allow only post-increment by the mode size. */
13752 if (GET_CODE (ind
) == POST_INC
)
13753 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13758 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13761 neon_struct_mem_operand (rtx op
)
13765 /* Reject eliminable registers. */
13766 if (! (reload_in_progress
|| reload_completed
)
13767 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13768 || reg_mentioned_p (arg_pointer_rtx
, op
)
13769 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13770 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13771 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13772 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13775 /* Constants are converted into offsets from labels. */
13779 ind
= XEXP (op
, 0);
13781 if (reload_completed
13782 && (LABEL_REF_P (ind
)
13783 || (GET_CODE (ind
) == CONST
13784 && GET_CODE (XEXP (ind
, 0)) == PLUS
13785 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13786 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13789 /* Match: (mem (reg)). */
13791 return arm_address_register_rtx_p (ind
, 0);
13793 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13794 if (GET_CODE (ind
) == POST_INC
13795 || GET_CODE (ind
) == PRE_DEC
)
13796 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13801 /* Prepares the operands for the VCMLA by lane instruction such that the right
13802 register number is selected. This instruction is special in that it always
13803 requires a D register, however there is a choice to be made between Dn[0],
13804 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13806 The VCMLA by lane function always selects two values. For instance given D0
13807 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13808 used by the instruction. However given V4SF then index 0 and 1 are valid as
13809 D0[0] or D1[0] are both valid.
13811 This function centralizes that information based on OPERANDS, OPERANDS[3]
13812 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13813 updated to contain the right index. */
13816 neon_vcmla_lane_prepare_operands (rtx
*operands
)
13818 int lane
= INTVAL (operands
[4]);
13819 machine_mode constmode
= SImode
;
13820 machine_mode mode
= GET_MODE (operands
[3]);
13821 int regno
= REGNO (operands
[3]);
13822 regno
= ((regno
- FIRST_VFP_REGNUM
) >> 1);
13823 if (lane
> 0 && lane
>= GET_MODE_NUNITS (mode
) / 4)
13825 operands
[3] = gen_int_mode (regno
+ 1, constmode
);
13827 = gen_int_mode (lane
- GET_MODE_NUNITS (mode
) / 4, constmode
);
13831 operands
[3] = gen_int_mode (regno
, constmode
);
13832 operands
[4] = gen_int_mode (lane
, constmode
);
13838 /* Return true if X is a register that will be eliminated later on. */
13840 arm_eliminable_register (rtx x
)
13842 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13843 || REGNO (x
) == ARG_POINTER_REGNUM
13844 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
13845 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
13848 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13849 coprocessor registers. Otherwise return NO_REGS. */
13852 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13854 if (mode
== HFmode
)
13856 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
13857 return GENERAL_REGS
;
13858 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13860 return GENERAL_REGS
;
13863 /* The neon move patterns handle all legitimate vector and struct
13866 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13867 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13868 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13869 || VALID_NEON_STRUCT_MODE (mode
)))
13872 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13875 return GENERAL_REGS
;
13878 /* Values which must be returned in the most-significant end of the return
13882 arm_return_in_msb (const_tree valtype
)
13884 return (TARGET_AAPCS_BASED
13885 && BYTES_BIG_ENDIAN
13886 && (AGGREGATE_TYPE_P (valtype
)
13887 || TREE_CODE (valtype
) == COMPLEX_TYPE
13888 || FIXED_POINT_TYPE_P (valtype
)));
13891 /* Return TRUE if X references a SYMBOL_REF. */
13893 symbol_mentioned_p (rtx x
)
13898 if (SYMBOL_REF_P (x
))
13901 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13902 are constant offsets, not symbols. */
13903 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13906 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13908 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13914 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13915 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13918 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13925 /* Return TRUE if X references a LABEL_REF. */
13927 label_mentioned_p (rtx x
)
13932 if (LABEL_REF_P (x
))
13935 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13936 instruction, but they are constant offsets, not symbols. */
13937 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13940 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13941 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13947 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13948 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13951 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13959 tls_mentioned_p (rtx x
)
13961 switch (GET_CODE (x
))
13964 return tls_mentioned_p (XEXP (x
, 0));
13967 if (XINT (x
, 1) == UNSPEC_TLS
)
13970 /* Fall through. */
13976 /* Must not copy any rtx that uses a pc-relative address.
13977 Also, disallow copying of load-exclusive instructions that
13978 may appear after splitting of compare-and-swap-style operations
13979 so as to prevent those loops from being transformed away from their
13980 canonical forms (see PR 69904). */
13983 arm_cannot_copy_insn_p (rtx_insn
*insn
)
13985 /* The tls call insn cannot be copied, as it is paired with a data
13987 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13990 subrtx_iterator::array_type array
;
13991 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
13993 const_rtx x
= *iter
;
13994 if (GET_CODE (x
) == UNSPEC
13995 && (XINT (x
, 1) == UNSPEC_PIC_BASE
13996 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
14000 rtx set
= single_set (insn
);
14003 rtx src
= SET_SRC (set
);
14004 if (GET_CODE (src
) == ZERO_EXTEND
)
14005 src
= XEXP (src
, 0);
14007 /* Catch the load-exclusive and load-acquire operations. */
14008 if (GET_CODE (src
) == UNSPEC_VOLATILE
14009 && (XINT (src
, 1) == VUNSPEC_LL
14010 || XINT (src
, 1) == VUNSPEC_LAX
))
14017 minmax_code (rtx x
)
14019 enum rtx_code code
= GET_CODE (x
);
14032 gcc_unreachable ();
14036 /* Match pair of min/max operators that can be implemented via usat/ssat. */
14039 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
14040 int *mask
, bool *signed_sat
)
14042 /* The high bound must be a power of two minus one. */
14043 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
14047 /* The low bound is either zero (for usat) or one less than the
14048 negation of the high bound (for ssat). */
14049 if (INTVAL (lo_bound
) == 0)
14054 *signed_sat
= false;
14059 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
14064 *signed_sat
= true;
14072 /* Return 1 if memory locations are adjacent. */
14074 adjacent_mem_locations (rtx a
, rtx b
)
14076 /* We don't guarantee to preserve the order of these memory refs. */
14077 if (volatile_refs_p (a
) || volatile_refs_p (b
))
14080 if ((REG_P (XEXP (a
, 0))
14081 || (GET_CODE (XEXP (a
, 0)) == PLUS
14082 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
14083 && (REG_P (XEXP (b
, 0))
14084 || (GET_CODE (XEXP (b
, 0)) == PLUS
14085 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
14087 HOST_WIDE_INT val0
= 0, val1
= 0;
14091 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
14093 reg0
= XEXP (XEXP (a
, 0), 0);
14094 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
14097 reg0
= XEXP (a
, 0);
14099 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
14101 reg1
= XEXP (XEXP (b
, 0), 0);
14102 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
14105 reg1
= XEXP (b
, 0);
14107 /* Don't accept any offset that will require multiple
14108 instructions to handle, since this would cause the
14109 arith_adjacentmem pattern to output an overlong sequence. */
14110 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
14113 /* Don't allow an eliminable register: register elimination can make
14114 the offset too large. */
14115 if (arm_eliminable_register (reg0
))
14118 val_diff
= val1
- val0
;
14122 /* If the target has load delay slots, then there's no benefit
14123 to using an ldm instruction unless the offset is zero and
14124 we are optimizing for size. */
14125 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
14126 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
14127 && (val_diff
== 4 || val_diff
== -4));
14130 return ((REGNO (reg0
) == REGNO (reg1
))
14131 && (val_diff
== 4 || val_diff
== -4));
14137 /* Return true if OP is a valid load or store multiple operation. LOAD is true
14138 for load operations, false for store operations. CONSECUTIVE is true
14139 if the register numbers in the operation must be consecutive in the register
14140 bank. RETURN_PC is true if value is to be loaded in PC.
14141 The pattern we are trying to match for load is:
14142 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14143 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14146 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14149 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14150 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14151 3. If consecutive is TRUE, then for kth register being loaded,
14152 REGNO (R_dk) = REGNO (R_d0) + k.
14153 The pattern for store is similar. */
14155 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
14156 bool consecutive
, bool return_pc
)
14158 HOST_WIDE_INT count
= XVECLEN (op
, 0);
14159 rtx reg
, mem
, addr
;
14161 unsigned first_regno
;
14162 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
14164 bool addr_reg_in_reglist
= false;
14165 bool update
= false;
14170 /* If not in SImode, then registers must be consecutive
14171 (e.g., VLDM instructions for DFmode). */
14172 gcc_assert ((mode
== SImode
) || consecutive
);
14173 /* Setting return_pc for stores is illegal. */
14174 gcc_assert (!return_pc
|| load
);
14176 /* Set up the increments and the regs per val based on the mode. */
14177 reg_increment
= GET_MODE_SIZE (mode
);
14178 regs_per_val
= reg_increment
/ 4;
14179 offset_adj
= return_pc
? 1 : 0;
14182 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
14183 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
14186 /* Check if this is a write-back. */
14187 elt
= XVECEXP (op
, 0, offset_adj
);
14188 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
14194 /* The offset adjustment must be the number of registers being
14195 popped times the size of a single register. */
14196 if (!REG_P (SET_DEST (elt
))
14197 || !REG_P (XEXP (SET_SRC (elt
), 0))
14198 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
14199 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
14200 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
14201 ((count
- 1 - offset_adj
) * reg_increment
))
14205 i
= i
+ offset_adj
;
14206 base
= base
+ offset_adj
;
14207 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14208 success depends on the type: VLDM can do just one reg,
14209 LDM must do at least two. */
14210 if ((count
<= i
) && (mode
== SImode
))
14213 elt
= XVECEXP (op
, 0, i
- 1);
14214 if (GET_CODE (elt
) != SET
)
14219 reg
= SET_DEST (elt
);
14220 mem
= SET_SRC (elt
);
14224 reg
= SET_SRC (elt
);
14225 mem
= SET_DEST (elt
);
14228 if (!REG_P (reg
) || !MEM_P (mem
))
14231 regno
= REGNO (reg
);
14232 first_regno
= regno
;
14233 addr
= XEXP (mem
, 0);
14234 if (GET_CODE (addr
) == PLUS
)
14236 if (!CONST_INT_P (XEXP (addr
, 1)))
14239 offset
= INTVAL (XEXP (addr
, 1));
14240 addr
= XEXP (addr
, 0);
14246 /* Don't allow SP to be loaded unless it is also the base register. It
14247 guarantees that SP is reset correctly when an LDM instruction
14248 is interrupted. Otherwise, we might end up with a corrupt stack. */
14249 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
14252 if (regno
== REGNO (addr
))
14253 addr_reg_in_reglist
= true;
14255 for (; i
< count
; i
++)
14257 elt
= XVECEXP (op
, 0, i
);
14258 if (GET_CODE (elt
) != SET
)
14263 reg
= SET_DEST (elt
);
14264 mem
= SET_SRC (elt
);
14268 reg
= SET_SRC (elt
);
14269 mem
= SET_DEST (elt
);
14273 || GET_MODE (reg
) != mode
14274 || REGNO (reg
) <= regno
14277 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
14278 /* Don't allow SP to be loaded unless it is also the base register. It
14279 guarantees that SP is reset correctly when an LDM instruction
14280 is interrupted. Otherwise, we might end up with a corrupt stack. */
14281 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
14283 || GET_MODE (mem
) != mode
14284 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
14285 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
14286 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
14287 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
14288 offset
+ (i
- base
) * reg_increment
))
14289 && (!REG_P (XEXP (mem
, 0))
14290 || offset
+ (i
- base
) * reg_increment
!= 0)))
14293 regno
= REGNO (reg
);
14294 if (regno
== REGNO (addr
))
14295 addr_reg_in_reglist
= true;
14300 if (update
&& addr_reg_in_reglist
)
14303 /* For Thumb-1, address register is always modified - either by write-back
14304 or by explicit load. If the pattern does not describe an update,
14305 then the address register must be in the list of loaded registers. */
14307 return update
|| addr_reg_in_reglist
;
14313 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14314 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14317 [(set (reg:SI <N>) (const_int 0))
14318 (set (reg:SI <M>) (const_int 0))
14320 (unspec_volatile [(const_int 0)]
14322 (clobber (reg:CC CC_REGNUM))
14325 Any number (including 0) of set expressions is valid, the volatile unspec is
14326 optional. All registers but SP and PC are allowed and registers must be in
14327 strict increasing order.
14329 To be a valid VSCCLRM pattern, OP must have the following form:
14331 [(unspec_volatile [(const_int 0)]
14332 VUNSPEC_VSCCLRM_VPR)
14333 (set (reg:SF <N>) (const_int 0))
14334 (set (reg:SF <M>) (const_int 0))
14338 As with CLRM, any number (including 0) of set expressions is valid, however
14339 the volatile unspec is mandatory here. Any VFP single-precision register is
14340 accepted but all registers must be consecutive and in increasing order. */
14343 clear_operation_p (rtx op
, bool vfp
)
14346 unsigned last_regno
= INVALID_REGNUM
;
14347 rtx elt
, reg
, zero
;
14348 int count
= XVECLEN (op
, 0);
14349 int first_set
= vfp
? 1 : 0;
14350 machine_mode expected_mode
= vfp
? E_SFmode
: E_SImode
;
14352 for (int i
= first_set
; i
< count
; i
++)
14354 elt
= XVECEXP (op
, 0, i
);
14356 if (!vfp
&& GET_CODE (elt
) == UNSPEC_VOLATILE
)
14358 if (XINT (elt
, 1) != VUNSPEC_CLRM_APSR
14359 || XVECLEN (elt
, 0) != 1
14360 || XVECEXP (elt
, 0, 0) != CONST0_RTX (SImode
)
14367 if (GET_CODE (elt
) == CLOBBER
)
14370 if (GET_CODE (elt
) != SET
)
14373 reg
= SET_DEST (elt
);
14374 zero
= SET_SRC (elt
);
14377 || GET_MODE (reg
) != expected_mode
14378 || zero
!= CONST0_RTX (SImode
))
14381 regno
= REGNO (reg
);
14385 if (i
!= first_set
&& regno
!= last_regno
+ 1)
14390 if (regno
== SP_REGNUM
|| regno
== PC_REGNUM
)
14392 if (i
!= first_set
&& regno
<= last_regno
)
14396 last_regno
= regno
;
14402 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14403 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14404 instruction. ADD_OFFSET is nonzero if the base address register needs
14405 to be modified with an add instruction before we can use it. */
14408 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
14409 int nops
, HOST_WIDE_INT add_offset
)
14411 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14412 if the offset isn't small enough. The reason 2 ldrs are faster
14413 is because these ARMs are able to do more than one cache access
14414 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14415 whilst the ARM8 has a double bandwidth cache. This means that
14416 these cores can do both an instruction fetch and a data fetch in
14417 a single cycle, so the trick of calculating the address into a
14418 scratch register (one of the result regs) and then doing a load
14419 multiple actually becomes slower (and no smaller in code size).
14420 That is the transformation
14422 ldr rd1, [rbase + offset]
14423 ldr rd2, [rbase + offset + 4]
14427 add rd1, rbase, offset
14428 ldmia rd1, {rd1, rd2}
14430 produces worse code -- '3 cycles + any stalls on rd2' instead of
14431 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14432 access per cycle, the first sequence could never complete in less
14433 than 6 cycles, whereas the ldm sequence would only take 5 and
14434 would make better use of sequential accesses if not hitting the
14437 We cheat here and test 'arm_ld_sched' which we currently know to
14438 only be true for the ARM8, ARM9 and StrongARM. If this ever
14439 changes, then the test below needs to be reworked. */
14440 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
14443 /* XScale has load-store double instructions, but they have stricter
14444 alignment requirements than load-store multiple, so we cannot
14447 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14448 the pipeline until completion.
14456 An ldr instruction takes 1-3 cycles, but does not block the
14465 Best case ldr will always win. However, the more ldr instructions
14466 we issue, the less likely we are to be able to schedule them well.
14467 Using ldr instructions also increases code size.
14469 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14470 for counts of 3 or 4 regs. */
14471 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
14476 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14477 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14478 an array ORDER which describes the sequence to use when accessing the
14479 offsets that produces an ascending order. In this sequence, each
14480 offset must be larger by exactly 4 than the previous one. ORDER[0]
14481 must have been filled in with the lowest offset by the caller.
14482 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14483 we use to verify that ORDER produces an ascending order of registers.
14484 Return true if it was possible to construct such an order, false if
14488 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
14489 int *unsorted_regs
)
14492 for (i
= 1; i
< nops
; i
++)
14496 order
[i
] = order
[i
- 1];
14497 for (j
= 0; j
< nops
; j
++)
14498 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
14500 /* We must find exactly one offset that is higher than the
14501 previous one by 4. */
14502 if (order
[i
] != order
[i
- 1])
14506 if (order
[i
] == order
[i
- 1])
14508 /* The register numbers must be ascending. */
14509 if (unsorted_regs
!= NULL
14510 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
14516 /* Used to determine in a peephole whether a sequence of load
14517 instructions can be changed into a load-multiple instruction.
14518 NOPS is the number of separate load instructions we are examining. The
14519 first NOPS entries in OPERANDS are the destination registers, the
14520 next NOPS entries are memory operands. If this function is
14521 successful, *BASE is set to the common base register of the memory
14522 accesses; *LOAD_OFFSET is set to the first memory location's offset
14523 from that base register.
14524 REGS is an array filled in with the destination register numbers.
14525 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14526 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14527 the sequence of registers in REGS matches the loads from ascending memory
14528 locations, and the function verifies that the register numbers are
14529 themselves ascending. If CHECK_REGS is false, the register numbers
14530 are stored in the order they are found in the operands. */
14532 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
14533 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
14535 int unsorted_regs
[MAX_LDM_STM_OPS
];
14536 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
14537 int order
[MAX_LDM_STM_OPS
];
14541 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14542 easily extended if required. */
14543 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
14545 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
14547 /* Loop over the operands and check that the memory references are
14548 suitable (i.e. immediate offsets from the same base register). At
14549 the same time, extract the target register, and the memory
14551 for (i
= 0; i
< nops
; i
++)
14556 /* Convert a subreg of a mem into the mem itself. */
14557 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
14558 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
14560 gcc_assert (MEM_P (operands
[nops
+ i
]));
14562 /* Don't reorder volatile memory references; it doesn't seem worth
14563 looking for the case where the order is ok anyway. */
14564 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
14567 offset
= const0_rtx
;
14569 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
14571 && REG_P (reg
= SUBREG_REG (reg
))))
14572 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
14573 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
14575 && REG_P (reg
= SUBREG_REG (reg
))))
14576 && (CONST_INT_P (offset
14577 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
14581 base_reg
= REGNO (reg
);
14582 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
14585 else if (base_reg
!= (int) REGNO (reg
))
14586 /* Not addressed from the same base register. */
14589 unsorted_regs
[i
] = (REG_P (operands
[i
])
14590 ? REGNO (operands
[i
])
14591 : REGNO (SUBREG_REG (operands
[i
])));
14593 /* If it isn't an integer register, or if it overwrites the
14594 base register but isn't the last insn in the list, then
14595 we can't do this. */
14596 if (unsorted_regs
[i
] < 0
14597 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
14598 || unsorted_regs
[i
] > 14
14599 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
14602 /* Don't allow SP to be loaded unless it is also the base
14603 register. It guarantees that SP is reset correctly when
14604 an LDM instruction is interrupted. Otherwise, we might
14605 end up with a corrupt stack. */
14606 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
14609 unsorted_offsets
[i
] = INTVAL (offset
);
14610 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
14614 /* Not a suitable memory address. */
14618 /* All the useful information has now been extracted from the
14619 operands into unsorted_regs and unsorted_offsets; additionally,
14620 order[0] has been set to the lowest offset in the list. Sort
14621 the offsets into order, verifying that they are adjacent, and
14622 check that the register numbers are ascending. */
14623 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
14624 check_regs
? unsorted_regs
: NULL
))
14628 memcpy (saved_order
, order
, sizeof order
);
14634 for (i
= 0; i
< nops
; i
++)
14635 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
14637 *load_offset
= unsorted_offsets
[order
[0]];
14640 if (unsorted_offsets
[order
[0]] == 0)
14641 ldm_case
= 1; /* ldmia */
14642 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14643 ldm_case
= 2; /* ldmib */
14644 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14645 ldm_case
= 3; /* ldmda */
14646 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14647 ldm_case
= 4; /* ldmdb */
14648 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
14649 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
14654 if (!multiple_operation_profitable_p (false, nops
,
14656 ? unsorted_offsets
[order
[0]] : 0))
14662 /* Used to determine in a peephole whether a sequence of store instructions can
14663 be changed into a store-multiple instruction.
14664 NOPS is the number of separate store instructions we are examining.
14665 NOPS_TOTAL is the total number of instructions recognized by the peephole
14667 The first NOPS entries in OPERANDS are the source registers, the next
14668 NOPS entries are memory operands. If this function is successful, *BASE is
14669 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14670 to the first memory location's offset from that base register. REGS is an
14671 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14672 likewise filled with the corresponding rtx's.
14673 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14674 numbers to an ascending order of stores.
14675 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14676 from ascending memory locations, and the function verifies that the register
14677 numbers are themselves ascending. If CHECK_REGS is false, the register
14678 numbers are stored in the order they are found in the operands. */
14680 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
14681 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
14682 HOST_WIDE_INT
*load_offset
, bool check_regs
)
14684 int unsorted_regs
[MAX_LDM_STM_OPS
];
14685 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
14686 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
14687 int order
[MAX_LDM_STM_OPS
];
14689 rtx base_reg_rtx
= NULL
;
14692 /* Write back of base register is currently only supported for Thumb 1. */
14693 int base_writeback
= TARGET_THUMB1
;
14695 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14696 easily extended if required. */
14697 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
14699 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
14701 /* Loop over the operands and check that the memory references are
14702 suitable (i.e. immediate offsets from the same base register). At
14703 the same time, extract the target register, and the memory
14705 for (i
= 0; i
< nops
; i
++)
14710 /* Convert a subreg of a mem into the mem itself. */
14711 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
14712 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
14714 gcc_assert (MEM_P (operands
[nops
+ i
]));
14716 /* Don't reorder volatile memory references; it doesn't seem worth
14717 looking for the case where the order is ok anyway. */
14718 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
14721 offset
= const0_rtx
;
14723 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
14725 && REG_P (reg
= SUBREG_REG (reg
))))
14726 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
14727 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
14729 && REG_P (reg
= SUBREG_REG (reg
))))
14730 && (CONST_INT_P (offset
14731 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
14733 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
14734 ? operands
[i
] : SUBREG_REG (operands
[i
]));
14735 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
14739 base_reg
= REGNO (reg
);
14740 base_reg_rtx
= reg
;
14741 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
14744 else if (base_reg
!= (int) REGNO (reg
))
14745 /* Not addressed from the same base register. */
14748 /* If it isn't an integer register, then we can't do this. */
14749 if (unsorted_regs
[i
] < 0
14750 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
14751 /* The effects are unpredictable if the base register is
14752 both updated and stored. */
14753 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
14754 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
14755 || unsorted_regs
[i
] > 14)
14758 unsorted_offsets
[i
] = INTVAL (offset
);
14759 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
14763 /* Not a suitable memory address. */
14767 /* All the useful information has now been extracted from the
14768 operands into unsorted_regs and unsorted_offsets; additionally,
14769 order[0] has been set to the lowest offset in the list. Sort
14770 the offsets into order, verifying that they are adjacent, and
14771 check that the register numbers are ascending. */
14772 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
14773 check_regs
? unsorted_regs
: NULL
))
14777 memcpy (saved_order
, order
, sizeof order
);
14783 for (i
= 0; i
< nops
; i
++)
14785 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
14787 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
14790 *load_offset
= unsorted_offsets
[order
[0]];
14794 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
14797 if (unsorted_offsets
[order
[0]] == 0)
14798 stm_case
= 1; /* stmia */
14799 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14800 stm_case
= 2; /* stmib */
14801 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14802 stm_case
= 3; /* stmda */
14803 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14804 stm_case
= 4; /* stmdb */
14808 if (!multiple_operation_profitable_p (false, nops
, 0))
14814 /* Routines for use in generating RTL. */
14816 /* Generate a load-multiple instruction. COUNT is the number of loads in
14817 the instruction; REGS and MEMS are arrays containing the operands.
14818 BASEREG is the base register to be used in addressing the memory operands.
14819 WBACK_OFFSET is nonzero if the instruction should update the base
14823 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14824 HOST_WIDE_INT wback_offset
)
14829 if (!multiple_operation_profitable_p (false, count
, 0))
14835 for (i
= 0; i
< count
; i
++)
14836 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
14838 if (wback_offset
!= 0)
14839 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14841 seq
= get_insns ();
14847 result
= gen_rtx_PARALLEL (VOIDmode
,
14848 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14849 if (wback_offset
!= 0)
14851 XVECEXP (result
, 0, 0)
14852 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14857 for (j
= 0; i
< count
; i
++, j
++)
14858 XVECEXP (result
, 0, i
)
14859 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
14864 /* Generate a store-multiple instruction. COUNT is the number of stores in
14865 the instruction; REGS and MEMS are arrays containing the operands.
14866 BASEREG is the base register to be used in addressing the memory operands.
14867 WBACK_OFFSET is nonzero if the instruction should update the base
14871 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14872 HOST_WIDE_INT wback_offset
)
14877 if (GET_CODE (basereg
) == PLUS
)
14878 basereg
= XEXP (basereg
, 0);
14880 if (!multiple_operation_profitable_p (false, count
, 0))
14886 for (i
= 0; i
< count
; i
++)
14887 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
14889 if (wback_offset
!= 0)
14890 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14892 seq
= get_insns ();
14898 result
= gen_rtx_PARALLEL (VOIDmode
,
14899 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14900 if (wback_offset
!= 0)
14902 XVECEXP (result
, 0, 0)
14903 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14908 for (j
= 0; i
< count
; i
++, j
++)
14909 XVECEXP (result
, 0, i
)
14910 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
14915 /* Generate either a load-multiple or a store-multiple instruction. This
14916 function can be used in situations where we can start with a single MEM
14917 rtx and adjust its address upwards.
14918 COUNT is the number of operations in the instruction, not counting a
14919 possible update of the base register. REGS is an array containing the
14921 BASEREG is the base register to be used in addressing the memory operands,
14922 which are constructed from BASEMEM.
14923 WRITE_BACK specifies whether the generated instruction should include an
14924 update of the base register.
14925 OFFSETP is used to pass an offset to and from this function; this offset
14926 is not used when constructing the address (instead BASEMEM should have an
14927 appropriate offset in its address), it is used only for setting
14928 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14931 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
14932 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
14934 rtx mems
[MAX_LDM_STM_OPS
];
14935 HOST_WIDE_INT offset
= *offsetp
;
14938 gcc_assert (count
<= MAX_LDM_STM_OPS
);
14940 if (GET_CODE (basereg
) == PLUS
)
14941 basereg
= XEXP (basereg
, 0);
14943 for (i
= 0; i
< count
; i
++)
14945 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
14946 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
14954 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
14955 write_back
? 4 * count
: 0);
14957 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
14958 write_back
? 4 * count
: 0);
14962 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14963 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14965 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
14970 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14971 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14973 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
14977 /* Called from a peephole2 expander to turn a sequence of loads into an
14978 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14979 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14980 is true if we can reorder the registers because they are used commutatively
14982 Returns true iff we could generate a new instruction. */
14985 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
14987 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14988 rtx mems
[MAX_LDM_STM_OPS
];
14989 int i
, j
, base_reg
;
14991 HOST_WIDE_INT offset
;
14992 int write_back
= FALSE
;
14996 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
14997 &base_reg
, &offset
, !sort_regs
);
15003 for (i
= 0; i
< nops
- 1; i
++)
15004 for (j
= i
+ 1; j
< nops
; j
++)
15005 if (regs
[i
] > regs
[j
])
15011 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15015 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
15017 /* Thumb-1 ldm uses writeback except if the base is loaded. */
15019 for (i
= 0; i
< nops
; i
++)
15020 if (base_reg
== regs
[i
])
15021 write_back
= false;
15023 /* Ensure the base is dead if it is updated. */
15024 if (write_back
&& !peep2_reg_dead_p (nops
, base_reg_rtx
))
15030 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
15031 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
15033 base_reg_rtx
= newbase
;
15036 for (i
= 0; i
< nops
; i
++)
15038 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15039 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15042 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
15043 write_back
? offset
+ i
* 4 : 0));
15047 /* Called from a peephole2 expander to turn a sequence of stores into an
15048 STM instruction. OPERANDS are the operands found by the peephole matcher;
15049 NOPS indicates how many separate stores we are trying to combine.
15050 Returns true iff we could generate a new instruction. */
15053 gen_stm_seq (rtx
*operands
, int nops
)
15056 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
15057 rtx mems
[MAX_LDM_STM_OPS
];
15060 HOST_WIDE_INT offset
;
15061 int write_back
= FALSE
;
15064 bool base_reg_dies
;
15066 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
15067 mem_order
, &base_reg
, &offset
, true);
15072 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15074 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
15077 gcc_assert (base_reg_dies
);
15083 gcc_assert (base_reg_dies
);
15084 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
15088 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
15090 for (i
= 0; i
< nops
; i
++)
15092 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15093 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15096 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
15097 write_back
? offset
+ i
* 4 : 0));
15101 /* Called from a peephole2 expander to turn a sequence of stores that are
15102 preceded by constant loads into an STM instruction. OPERANDS are the
15103 operands found by the peephole matcher; NOPS indicates how many
15104 separate stores we are trying to combine; there are 2 * NOPS
15105 instructions in the peephole.
15106 Returns true iff we could generate a new instruction. */
15109 gen_const_stm_seq (rtx
*operands
, int nops
)
15111 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
15112 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
15113 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
15114 rtx mems
[MAX_LDM_STM_OPS
];
15117 HOST_WIDE_INT offset
;
15118 int write_back
= FALSE
;
15121 bool base_reg_dies
;
15123 HARD_REG_SET allocated
;
15125 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
15126 mem_order
, &base_reg
, &offset
, false);
15131 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
15133 /* If the same register is used more than once, try to find a free
15135 CLEAR_HARD_REG_SET (allocated
);
15136 for (i
= 0; i
< nops
; i
++)
15138 for (j
= i
+ 1; j
< nops
; j
++)
15139 if (regs
[i
] == regs
[j
])
15141 rtx t
= peep2_find_free_register (0, nops
* 2,
15142 TARGET_THUMB1
? "l" : "r",
15143 SImode
, &allocated
);
15147 regs
[i
] = REGNO (t
);
15151 /* Compute an ordering that maps the register numbers to an ascending
15154 for (i
= 0; i
< nops
; i
++)
15155 if (regs
[i
] < regs
[reg_order
[0]])
15158 for (i
= 1; i
< nops
; i
++)
15160 int this_order
= reg_order
[i
- 1];
15161 for (j
= 0; j
< nops
; j
++)
15162 if (regs
[j
] > regs
[reg_order
[i
- 1]]
15163 && (this_order
== reg_order
[i
- 1]
15164 || regs
[j
] < regs
[this_order
]))
15166 reg_order
[i
] = this_order
;
15169 /* Ensure that registers that must be live after the instruction end
15170 up with the correct value. */
15171 for (i
= 0; i
< nops
; i
++)
15173 int this_order
= reg_order
[i
];
15174 if ((this_order
!= mem_order
[i
]
15175 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
15176 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
15180 /* Load the constants. */
15181 for (i
= 0; i
< nops
; i
++)
15183 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
15184 sorted_regs
[i
] = regs
[reg_order
[i
]];
15185 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
15188 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15190 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
15193 gcc_assert (base_reg_dies
);
15199 gcc_assert (base_reg_dies
);
15200 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
15204 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
15206 for (i
= 0; i
< nops
; i
++)
15208 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15209 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15212 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
15213 write_back
? offset
+ i
* 4 : 0));
15217 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15218 unaligned copies on processors which support unaligned semantics for those
15219 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
15220 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15221 An interleave factor of 1 (the minimum) will perform no interleaving.
15222 Load/store multiple are used for aligned addresses where possible. */
15225 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
15226 HOST_WIDE_INT length
,
15227 unsigned int interleave_factor
)
15229 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
15230 int *regnos
= XALLOCAVEC (int, interleave_factor
);
15231 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
15232 HOST_WIDE_INT i
, j
;
15233 HOST_WIDE_INT remaining
= length
, words
;
15234 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
15236 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
15237 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
15238 HOST_WIDE_INT srcoffset
, dstoffset
;
15239 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
15242 gcc_assert (interleave_factor
>= 1 && interleave_factor
<= 4);
15244 /* Use hard registers if we have aligned source or destination so we can use
15245 load/store multiple with contiguous registers. */
15246 if (dst_aligned
|| src_aligned
)
15247 for (i
= 0; i
< interleave_factor
; i
++)
15248 regs
[i
] = gen_rtx_REG (SImode
, i
);
15250 for (i
= 0; i
< interleave_factor
; i
++)
15251 regs
[i
] = gen_reg_rtx (SImode
);
15253 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
15254 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
15256 srcoffset
= dstoffset
= 0;
15258 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15259 For copying the last bytes we want to subtract this offset again. */
15260 src_autoinc
= dst_autoinc
= 0;
15262 for (i
= 0; i
< interleave_factor
; i
++)
15265 /* Copy BLOCK_SIZE_BYTES chunks. */
15267 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
15270 if (src_aligned
&& interleave_factor
> 1)
15272 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
15273 TRUE
, srcbase
, &srcoffset
));
15274 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
15278 for (j
= 0; j
< interleave_factor
; j
++)
15280 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
15282 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
15283 srcoffset
+ j
* UNITS_PER_WORD
);
15284 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
15286 srcoffset
+= block_size_bytes
;
15290 if (dst_aligned
&& interleave_factor
> 1)
15292 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
15293 TRUE
, dstbase
, &dstoffset
));
15294 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
15298 for (j
= 0; j
< interleave_factor
; j
++)
15300 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
15302 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
15303 dstoffset
+ j
* UNITS_PER_WORD
);
15304 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
15306 dstoffset
+= block_size_bytes
;
15309 remaining
-= block_size_bytes
;
15312 /* Copy any whole words left (note these aren't interleaved with any
15313 subsequent halfword/byte load/stores in the interests of simplicity). */
15315 words
= remaining
/ UNITS_PER_WORD
;
15317 gcc_assert (words
< interleave_factor
);
15319 if (src_aligned
&& words
> 1)
15321 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
15323 src_autoinc
+= UNITS_PER_WORD
* words
;
15327 for (j
= 0; j
< words
; j
++)
15329 addr
= plus_constant (Pmode
, src
,
15330 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
15331 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
15332 srcoffset
+ j
* UNITS_PER_WORD
);
15334 emit_move_insn (regs
[j
], mem
);
15336 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
15338 srcoffset
+= words
* UNITS_PER_WORD
;
15341 if (dst_aligned
&& words
> 1)
15343 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
15345 dst_autoinc
+= words
* UNITS_PER_WORD
;
15349 for (j
= 0; j
< words
; j
++)
15351 addr
= plus_constant (Pmode
, dst
,
15352 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
15353 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
15354 dstoffset
+ j
* UNITS_PER_WORD
);
15356 emit_move_insn (mem
, regs
[j
]);
15358 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
15360 dstoffset
+= words
* UNITS_PER_WORD
;
15363 remaining
-= words
* UNITS_PER_WORD
;
15365 gcc_assert (remaining
< 4);
15367 /* Copy a halfword if necessary. */
15369 if (remaining
>= 2)
15371 halfword_tmp
= gen_reg_rtx (SImode
);
15373 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
15374 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
15375 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
15377 /* Either write out immediately, or delay until we've loaded the last
15378 byte, depending on interleave factor. */
15379 if (interleave_factor
== 1)
15381 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15382 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
15383 emit_insn (gen_unaligned_storehi (mem
,
15384 gen_lowpart (HImode
, halfword_tmp
)));
15385 halfword_tmp
= NULL
;
15393 gcc_assert (remaining
< 2);
15395 /* Copy last byte. */
15397 if ((remaining
& 1) != 0)
15399 byte_tmp
= gen_reg_rtx (SImode
);
15401 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
15402 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
15403 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
15405 if (interleave_factor
== 1)
15407 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15408 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
15409 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
15418 /* Store last halfword if we haven't done so already. */
15422 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15423 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
15424 emit_insn (gen_unaligned_storehi (mem
,
15425 gen_lowpart (HImode
, halfword_tmp
)));
15429 /* Likewise for last byte. */
15433 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15434 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
15435 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
15439 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
15442 /* From mips_adjust_block_mem:
15444 Helper function for doing a loop-based block operation on memory
15445 reference MEM. Each iteration of the loop will operate on LENGTH
15448 Create a new base register for use within the loop and point it to
15449 the start of MEM. Create a new memory reference that uses this
15450 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15453 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
15456 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
15458 /* Although the new mem does not refer to a known location,
15459 it does keep up to LENGTH bytes of alignment. */
15460 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
15461 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
15464 /* From mips_block_move_loop:
15466 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15467 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15468 the memory regions do not overlap. */
15471 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
15472 unsigned int interleave_factor
,
15473 HOST_WIDE_INT bytes_per_iter
)
15475 rtx src_reg
, dest_reg
, final_src
, test
;
15476 HOST_WIDE_INT leftover
;
15478 leftover
= length
% bytes_per_iter
;
15479 length
-= leftover
;
15481 /* Create registers and memory references for use within the loop. */
15482 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
15483 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
15485 /* Calculate the value that SRC_REG should have after the last iteration of
15487 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
15488 0, 0, OPTAB_WIDEN
);
15490 /* Emit the start of the loop. */
15491 rtx_code_label
*label
= gen_label_rtx ();
15492 emit_label (label
);
15494 /* Emit the loop body. */
15495 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
15496 interleave_factor
);
15498 /* Move on to the next block. */
15499 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
15500 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
15502 /* Emit the loop condition. */
15503 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
15504 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
15506 /* Mop up any left-over bytes. */
15508 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
15511 /* Emit a block move when either the source or destination is unaligned (not
15512 aligned to a four-byte boundary). This may need further tuning depending on
15513 core type, optimize_size setting, etc. */
15516 arm_cpymemqi_unaligned (rtx
*operands
)
15518 HOST_WIDE_INT length
= INTVAL (operands
[2]);
15522 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
15523 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
15524 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15525 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15526 or dst_aligned though: allow more interleaving in those cases since the
15527 resulting code can be smaller. */
15528 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
15529 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
15532 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
15533 interleave_factor
, bytes_per_iter
);
15535 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
15536 interleave_factor
);
15540 /* Note that the loop created by arm_block_move_unaligned_loop may be
15541 subject to loop unrolling, which makes tuning this condition a little
15544 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
15546 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
15553 arm_gen_cpymemqi (rtx
*operands
)
15555 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
15556 HOST_WIDE_INT srcoffset
, dstoffset
;
15557 rtx src
, dst
, srcbase
, dstbase
;
15558 rtx part_bytes_reg
= NULL
;
15561 if (!CONST_INT_P (operands
[2])
15562 || !CONST_INT_P (operands
[3])
15563 || INTVAL (operands
[2]) > 64)
15566 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
15567 return arm_cpymemqi_unaligned (operands
);
15569 if (INTVAL (operands
[3]) & 3)
15572 dstbase
= operands
[0];
15573 srcbase
= operands
[1];
15575 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
15576 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
15578 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
15579 out_words_to_go
= INTVAL (operands
[2]) / 4;
15580 last_bytes
= INTVAL (operands
[2]) & 3;
15581 dstoffset
= srcoffset
= 0;
15583 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
15584 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
15586 while (in_words_to_go
>= 2)
15588 if (in_words_to_go
> 4)
15589 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
15590 TRUE
, srcbase
, &srcoffset
));
15592 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
15593 src
, FALSE
, srcbase
,
15596 if (out_words_to_go
)
15598 if (out_words_to_go
> 4)
15599 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
15600 TRUE
, dstbase
, &dstoffset
));
15601 else if (out_words_to_go
!= 1)
15602 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
15603 out_words_to_go
, dst
,
15606 dstbase
, &dstoffset
));
15609 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
15610 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
15611 if (last_bytes
!= 0)
15613 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
15619 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
15620 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
15623 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15624 if (out_words_to_go
)
15628 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
15629 sreg
= copy_to_reg (mem
);
15631 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
15632 emit_move_insn (mem
, sreg
);
15635 gcc_assert (!in_words_to_go
); /* Sanity check */
15638 if (in_words_to_go
)
15640 gcc_assert (in_words_to_go
> 0);
15642 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
15643 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
15646 gcc_assert (!last_bytes
|| part_bytes_reg
);
15648 if (BYTES_BIG_ENDIAN
&& last_bytes
)
15650 rtx tmp
= gen_reg_rtx (SImode
);
15652 /* The bytes we want are in the top end of the word. */
15653 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
15654 GEN_INT (8 * (4 - last_bytes
))));
15655 part_bytes_reg
= tmp
;
15659 mem
= adjust_automodify_address (dstbase
, QImode
,
15660 plus_constant (Pmode
, dst
,
15662 dstoffset
+ last_bytes
- 1);
15663 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
15667 tmp
= gen_reg_rtx (SImode
);
15668 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
15669 part_bytes_reg
= tmp
;
15676 if (last_bytes
> 1)
15678 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
15679 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
15683 rtx tmp
= gen_reg_rtx (SImode
);
15684 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
15685 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
15686 part_bytes_reg
= tmp
;
15693 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
15694 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
15701 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15704 next_consecutive_mem (rtx mem
)
15706 machine_mode mode
= GET_MODE (mem
);
15707 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
15708 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
15710 return adjust_automodify_address (mem
, mode
, addr
, offset
);
15713 /* Copy using LDRD/STRD instructions whenever possible.
15714 Returns true upon success. */
15716 gen_cpymem_ldrd_strd (rtx
*operands
)
15718 unsigned HOST_WIDE_INT len
;
15719 HOST_WIDE_INT align
;
15720 rtx src
, dst
, base
;
15722 bool src_aligned
, dst_aligned
;
15723 bool src_volatile
, dst_volatile
;
15725 gcc_assert (CONST_INT_P (operands
[2]));
15726 gcc_assert (CONST_INT_P (operands
[3]));
15728 len
= UINTVAL (operands
[2]);
15732 /* Maximum alignment we can assume for both src and dst buffers. */
15733 align
= INTVAL (operands
[3]);
15735 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
15738 /* Place src and dst addresses in registers
15739 and update the corresponding mem rtx. */
15741 dst_volatile
= MEM_VOLATILE_P (dst
);
15742 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
15743 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
15744 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
15747 src_volatile
= MEM_VOLATILE_P (src
);
15748 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
15749 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
15750 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
15752 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
15755 if (src_volatile
|| dst_volatile
)
15758 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15759 if (!(dst_aligned
|| src_aligned
))
15760 return arm_gen_cpymemqi (operands
);
15762 /* If the either src or dst is unaligned we'll be accessing it as pairs
15763 of unaligned SImode accesses. Otherwise we can generate DImode
15764 ldrd/strd instructions. */
15765 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
15766 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
15771 reg0
= gen_reg_rtx (DImode
);
15772 rtx first_reg
= NULL_RTX
;
15773 rtx second_reg
= NULL_RTX
;
15775 if (!src_aligned
|| !dst_aligned
)
15777 if (BYTES_BIG_ENDIAN
)
15779 second_reg
= gen_lowpart (SImode
, reg0
);
15780 first_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
15784 first_reg
= gen_lowpart (SImode
, reg0
);
15785 second_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
15788 if (MEM_ALIGN (src
) >= 2 * BITS_PER_WORD
)
15789 emit_move_insn (reg0
, src
);
15790 else if (src_aligned
)
15791 emit_insn (gen_unaligned_loaddi (reg0
, src
));
15794 emit_insn (gen_unaligned_loadsi (first_reg
, src
));
15795 src
= next_consecutive_mem (src
);
15796 emit_insn (gen_unaligned_loadsi (second_reg
, src
));
15799 if (MEM_ALIGN (dst
) >= 2 * BITS_PER_WORD
)
15800 emit_move_insn (dst
, reg0
);
15801 else if (dst_aligned
)
15802 emit_insn (gen_unaligned_storedi (dst
, reg0
));
15805 emit_insn (gen_unaligned_storesi (dst
, first_reg
));
15806 dst
= next_consecutive_mem (dst
);
15807 emit_insn (gen_unaligned_storesi (dst
, second_reg
));
15810 src
= next_consecutive_mem (src
);
15811 dst
= next_consecutive_mem (dst
);
15814 gcc_assert (len
< 8);
15817 /* More than a word but less than a double-word to copy. Copy a word. */
15818 reg0
= gen_reg_rtx (SImode
);
15819 src
= adjust_address (src
, SImode
, 0);
15820 dst
= adjust_address (dst
, SImode
, 0);
15822 emit_move_insn (reg0
, src
);
15824 emit_insn (gen_unaligned_loadsi (reg0
, src
));
15827 emit_move_insn (dst
, reg0
);
15829 emit_insn (gen_unaligned_storesi (dst
, reg0
));
15831 src
= next_consecutive_mem (src
);
15832 dst
= next_consecutive_mem (dst
);
15839 /* Copy the remaining bytes. */
15842 dst
= adjust_address (dst
, HImode
, 0);
15843 src
= adjust_address (src
, HImode
, 0);
15844 reg0
= gen_reg_rtx (SImode
);
15846 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
15848 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
15851 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
15853 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
15855 src
= next_consecutive_mem (src
);
15856 dst
= next_consecutive_mem (dst
);
15861 dst
= adjust_address (dst
, QImode
, 0);
15862 src
= adjust_address (src
, QImode
, 0);
15863 reg0
= gen_reg_rtx (QImode
);
15864 emit_move_insn (reg0
, src
);
15865 emit_move_insn (dst
, reg0
);
15869 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15870 into its component 32-bit subregs. OP2 may be an immediate
15871 constant and we want to simplify it in that case. */
15873 arm_decompose_di_binop (rtx op1
, rtx op2
, rtx
*lo_op1
, rtx
*hi_op1
,
15874 rtx
*lo_op2
, rtx
*hi_op2
)
15876 *lo_op1
= gen_lowpart (SImode
, op1
);
15877 *hi_op1
= gen_highpart (SImode
, op1
);
15878 *lo_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
15879 subreg_lowpart_offset (SImode
, DImode
));
15880 *hi_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
15881 subreg_highpart_offset (SImode
, DImode
));
15884 /* Select a dominance comparison mode if possible for a test of the general
15885 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15886 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15887 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15888 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15889 In all cases OP will be either EQ or NE, but we don't need to know which
15890 here. If we are unable to support a dominance comparison we return
15891 CC mode. This will then fail to match for the RTL expressions that
15892 generate this call. */
15894 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
15896 enum rtx_code cond1
, cond2
;
15899 /* Currently we will probably get the wrong result if the individual
15900 comparisons are not simple. This also ensures that it is safe to
15901 reverse a comparison if necessary. */
15902 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
15904 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
15908 /* The if_then_else variant of this tests the second condition if the
15909 first passes, but is true if the first fails. Reverse the first
15910 condition to get a true "inclusive-or" expression. */
15911 if (cond_or
== DOM_CC_NX_OR_Y
)
15912 cond1
= reverse_condition (cond1
);
15914 /* If the comparisons are not equal, and one doesn't dominate the other,
15915 then we can't do this. */
15917 && !comparison_dominates_p (cond1
, cond2
)
15918 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
15922 std::swap (cond1
, cond2
);
15927 if (cond_or
== DOM_CC_X_AND_Y
)
15932 case EQ
: return CC_DEQmode
;
15933 case LE
: return CC_DLEmode
;
15934 case LEU
: return CC_DLEUmode
;
15935 case GE
: return CC_DGEmode
;
15936 case GEU
: return CC_DGEUmode
;
15937 default: gcc_unreachable ();
15941 if (cond_or
== DOM_CC_X_AND_Y
)
15953 gcc_unreachable ();
15957 if (cond_or
== DOM_CC_X_AND_Y
)
15969 gcc_unreachable ();
15973 if (cond_or
== DOM_CC_X_AND_Y
)
15974 return CC_DLTUmode
;
15979 return CC_DLTUmode
;
15981 return CC_DLEUmode
;
15985 gcc_unreachable ();
15989 if (cond_or
== DOM_CC_X_AND_Y
)
15990 return CC_DGTUmode
;
15995 return CC_DGTUmode
;
15997 return CC_DGEUmode
;
16001 gcc_unreachable ();
16004 /* The remaining cases only occur when both comparisons are the
16007 gcc_assert (cond1
== cond2
);
16011 gcc_assert (cond1
== cond2
);
16015 gcc_assert (cond1
== cond2
);
16019 gcc_assert (cond1
== cond2
);
16020 return CC_DLEUmode
;
16023 gcc_assert (cond1
== cond2
);
16024 return CC_DGEUmode
;
16027 gcc_unreachable ();
16032 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
16034 /* All floating point compares return CCFP if it is an equality
16035 comparison, and CCFPE otherwise. */
16036 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
16059 gcc_unreachable ();
16063 /* A compare with a shifted operand. Because of canonicalization, the
16064 comparison will have to be swapped when we emit the assembler. */
16065 if (GET_MODE (y
) == SImode
16066 && (REG_P (y
) || (SUBREG_P (y
)))
16067 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
16068 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
16069 || GET_CODE (x
) == ROTATERT
))
16072 /* A widened compare of the sum of a value plus a carry against a
16073 constant. This is a representation of RSC. We want to swap the
16074 result of the comparison at output. Not valid if the Z bit is
16076 if (GET_MODE (x
) == DImode
16077 && GET_CODE (x
) == PLUS
16078 && arm_borrow_operation (XEXP (x
, 1), DImode
)
16080 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
16081 && (op
== LE
|| op
== GT
))
16082 || (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
16083 && (op
== LEU
|| op
== GTU
))))
16086 /* If X is a constant we want to use CC_RSBmode. This is
16087 non-canonical, but arm_gen_compare_reg uses this to generate the
16088 correct canonical form. */
16089 if (GET_MODE (y
) == SImode
16090 && (REG_P (y
) || SUBREG_P (y
))
16091 && CONST_INT_P (x
))
16094 /* This operation is performed swapped, but since we only rely on the Z
16095 flag we don't need an additional mode. */
16096 if (GET_MODE (y
) == SImode
16097 && (REG_P (y
) || (SUBREG_P (y
)))
16098 && GET_CODE (x
) == NEG
16099 && (op
== EQ
|| op
== NE
))
16102 /* This is a special case that is used by combine to allow a
16103 comparison of a shifted byte load to be split into a zero-extend
16104 followed by a comparison of the shifted integer (only valid for
16105 equalities and unsigned inequalities). */
16106 if (GET_MODE (x
) == SImode
16107 && GET_CODE (x
) == ASHIFT
16108 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
16109 && GET_CODE (XEXP (x
, 0)) == SUBREG
16110 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
16111 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
16112 && (op
== EQ
|| op
== NE
16113 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
16114 && CONST_INT_P (y
))
16117 /* A construct for a conditional compare, if the false arm contains
16118 0, then both conditions must be true, otherwise either condition
16119 must be true. Not all conditions are possible, so CCmode is
16120 returned if it can't be done. */
16121 if (GET_CODE (x
) == IF_THEN_ELSE
16122 && (XEXP (x
, 2) == const0_rtx
16123 || XEXP (x
, 2) == const1_rtx
)
16124 && COMPARISON_P (XEXP (x
, 0))
16125 && COMPARISON_P (XEXP (x
, 1)))
16126 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16127 INTVAL (XEXP (x
, 2)));
16129 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
16130 if (GET_CODE (x
) == AND
16131 && (op
== EQ
|| op
== NE
)
16132 && COMPARISON_P (XEXP (x
, 0))
16133 && COMPARISON_P (XEXP (x
, 1)))
16134 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16137 if (GET_CODE (x
) == IOR
16138 && (op
== EQ
|| op
== NE
)
16139 && COMPARISON_P (XEXP (x
, 0))
16140 && COMPARISON_P (XEXP (x
, 1)))
16141 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16144 /* An operation (on Thumb) where we want to test for a single bit.
16145 This is done by shifting that bit up into the top bit of a
16146 scratch register; we can then branch on the sign bit. */
16148 && GET_MODE (x
) == SImode
16149 && (op
== EQ
|| op
== NE
)
16150 && GET_CODE (x
) == ZERO_EXTRACT
16151 && XEXP (x
, 1) == const1_rtx
)
16154 /* An operation that sets the condition codes as a side-effect, the
16155 V flag is not set correctly, so we can only use comparisons where
16156 this doesn't matter. (For LT and GE we can use "mi" and "pl"
16158 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
16159 if (GET_MODE (x
) == SImode
16161 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
16162 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
16163 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
16164 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
16165 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
16166 || GET_CODE (x
) == LSHIFTRT
16167 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
16168 || GET_CODE (x
) == ROTATERT
16169 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
16172 /* A comparison of ~reg with a const is really a special
16173 canoncialization of compare (~const, reg), which is a reverse
16174 subtract operation. We may not get here if CONST is 0, but that
16175 doesn't matter because ~0 isn't a valid immediate for RSB. */
16176 if (GET_MODE (x
) == SImode
16177 && GET_CODE (x
) == NOT
16178 && CONST_INT_P (y
))
16181 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
16184 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
16185 && GET_CODE (x
) == PLUS
16186 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
16189 if (GET_MODE (x
) == DImode
16190 && GET_CODE (x
) == PLUS
16191 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
16193 && UINTVAL (y
) == 0x800000000
16194 && (op
== GEU
|| op
== LTU
))
16197 if (GET_MODE (x
) == DImode
16198 && (op
== GE
|| op
== LT
)
16199 && GET_CODE (x
) == SIGN_EXTEND
16200 && ((GET_CODE (y
) == PLUS
16201 && arm_borrow_operation (XEXP (y
, 0), DImode
))
16202 || arm_borrow_operation (y
, DImode
)))
16205 if (GET_MODE (x
) == DImode
16206 && (op
== GEU
|| op
== LTU
)
16207 && GET_CODE (x
) == ZERO_EXTEND
16208 && ((GET_CODE (y
) == PLUS
16209 && arm_borrow_operation (XEXP (y
, 0), DImode
))
16210 || arm_borrow_operation (y
, DImode
)))
16213 if (GET_MODE (x
) == DImode
16214 && (op
== EQ
|| op
== NE
)
16215 && (GET_CODE (x
) == PLUS
16216 || GET_CODE (x
) == MINUS
)
16217 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
16218 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
16219 && GET_CODE (y
) == SIGN_EXTEND
16220 && GET_CODE (XEXP (y
, 0)) == GET_CODE (x
))
16223 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
16224 return GET_MODE (x
);
16229 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
16230 the sequence of instructions needed to generate a suitable condition
16231 code register. Return the CC register result. */
16233 arm_gen_dicompare_reg (rtx_code code
, rtx x
, rtx y
, rtx scratch
)
16238 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
16239 gcc_assert (TARGET_32BIT
);
16240 gcc_assert (!CONST_INT_P (x
));
16242 rtx x_lo
= simplify_gen_subreg (SImode
, x
, DImode
,
16243 subreg_lowpart_offset (SImode
, DImode
));
16244 rtx x_hi
= simplify_gen_subreg (SImode
, x
, DImode
,
16245 subreg_highpart_offset (SImode
, DImode
));
16246 rtx y_lo
= simplify_gen_subreg (SImode
, y
, DImode
,
16247 subreg_lowpart_offset (SImode
, DImode
));
16248 rtx y_hi
= simplify_gen_subreg (SImode
, y
, DImode
,
16249 subreg_highpart_offset (SImode
, DImode
));
16255 if (y_lo
== const0_rtx
|| y_hi
== const0_rtx
)
16257 if (y_lo
!= const0_rtx
)
16259 rtx scratch2
= scratch
? scratch
: gen_reg_rtx (SImode
);
16261 gcc_assert (y_hi
== const0_rtx
);
16262 y_lo
= gen_int_mode (-INTVAL (y_lo
), SImode
);
16263 if (!arm_add_operand (y_lo
, SImode
))
16264 y_lo
= force_reg (SImode
, y_lo
);
16265 emit_insn (gen_addsi3 (scratch2
, x_lo
, y_lo
));
16268 else if (y_hi
!= const0_rtx
)
16270 rtx scratch2
= scratch
? scratch
: gen_reg_rtx (SImode
);
16272 y_hi
= gen_int_mode (-INTVAL (y_hi
), SImode
);
16273 if (!arm_add_operand (y_hi
, SImode
))
16274 y_hi
= force_reg (SImode
, y_hi
);
16275 emit_insn (gen_addsi3 (scratch2
, x_hi
, y_hi
));
16281 gcc_assert (!reload_completed
);
16282 scratch
= gen_rtx_SCRATCH (SImode
);
16285 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
16286 cc_reg
= gen_rtx_REG (CC_NZmode
, CC_REGNUM
);
16289 = gen_rtx_SET (cc_reg
,
16290 gen_rtx_COMPARE (CC_NZmode
,
16291 gen_rtx_IOR (SImode
, x_lo
, x_hi
),
16293 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
,
16298 if (!arm_add_operand (y_lo
, SImode
))
16299 y_lo
= force_reg (SImode
, y_lo
);
16301 if (!arm_add_operand (y_hi
, SImode
))
16302 y_hi
= force_reg (SImode
, y_hi
);
16304 rtx cmp1
= gen_rtx_NE (SImode
, x_lo
, y_lo
);
16305 rtx cmp2
= gen_rtx_NE (SImode
, x_hi
, y_hi
);
16306 rtx conjunction
= gen_rtx_IOR (SImode
, cmp1
, cmp2
);
16307 mode
= SELECT_CC_MODE (code
, conjunction
, const0_rtx
);
16308 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
16310 emit_insn (gen_rtx_SET (cc_reg
,
16311 gen_rtx_COMPARE (mode
, conjunction
,
16319 if (y_lo
== const0_rtx
)
16321 /* If the low word of y is 0, then this is simply a normal
16322 compare of the upper words. */
16323 if (!arm_add_operand (y_hi
, SImode
))
16324 y_hi
= force_reg (SImode
, y_hi
);
16326 return arm_gen_compare_reg (code
, x_hi
, y_hi
, NULL_RTX
);
16329 if (!arm_add_operand (y_lo
, SImode
))
16330 y_lo
= force_reg (SImode
, y_lo
);
16333 = gen_rtx_LTU (DImode
,
16334 arm_gen_compare_reg (LTU
, x_lo
, y_lo
, NULL_RTX
),
16338 scratch
= gen_rtx_SCRATCH (SImode
);
16340 if (!arm_not_operand (y_hi
, SImode
))
16341 y_hi
= force_reg (SImode
, y_hi
);
16344 if (y_hi
== const0_rtx
)
16345 insn
= emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch
, x_hi
,
16347 else if (CONST_INT_P (y_hi
))
16348 insn
= emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch
, x_hi
,
16351 insn
= emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch
, x_hi
, y_hi
,
16353 return SET_DEST (single_set (insn
));
16359 /* During expansion, we only expect to get here if y is a
16360 constant that we want to handle, otherwise we should have
16361 swapped the operands already. */
16362 gcc_assert (arm_const_double_prefer_rsbs_rsc (y
));
16364 if (!const_ok_for_arm (INTVAL (y_lo
)))
16365 y_lo
= force_reg (SImode
, y_lo
);
16367 /* Perform a reverse subtract and compare. */
16369 = gen_rtx_LTU (DImode
,
16370 arm_gen_compare_reg (LTU
, y_lo
, x_lo
, scratch
),
16372 rtx_insn
*insn
= emit_insn (gen_rscsi3_CC_NVout_scratch (scratch
, y_hi
,
16374 return SET_DEST (single_set (insn
));
16380 if (y_lo
== const0_rtx
)
16382 /* If the low word of y is 0, then this is simply a normal
16383 compare of the upper words. */
16384 if (!arm_add_operand (y_hi
, SImode
))
16385 y_hi
= force_reg (SImode
, y_hi
);
16387 return arm_gen_compare_reg (code
, x_hi
, y_hi
, NULL_RTX
);
16390 if (!arm_add_operand (y_lo
, SImode
))
16391 y_lo
= force_reg (SImode
, y_lo
);
16394 = gen_rtx_LTU (DImode
,
16395 arm_gen_compare_reg (LTU
, x_lo
, y_lo
, NULL_RTX
),
16399 scratch
= gen_rtx_SCRATCH (SImode
);
16400 if (!arm_not_operand (y_hi
, SImode
))
16401 y_hi
= force_reg (SImode
, y_hi
);
16404 if (y_hi
== const0_rtx
)
16405 insn
= emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch
, x_hi
,
16407 else if (CONST_INT_P (y_hi
))
16409 /* Constant is viewed as unsigned when zero-extended. */
16410 y_hi
= GEN_INT (UINTVAL (y_hi
) & 0xffffffffULL
);
16411 insn
= emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch
, x_hi
,
16415 insn
= emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch
, x_hi
, y_hi
,
16417 return SET_DEST (single_set (insn
));
16423 /* During expansion, we only expect to get here if y is a
16424 constant that we want to handle, otherwise we should have
16425 swapped the operands already. */
16426 gcc_assert (arm_const_double_prefer_rsbs_rsc (y
));
16428 if (!const_ok_for_arm (INTVAL (y_lo
)))
16429 y_lo
= force_reg (SImode
, y_lo
);
16431 /* Perform a reverse subtract and compare. */
16433 = gen_rtx_LTU (DImode
,
16434 arm_gen_compare_reg (LTU
, y_lo
, x_lo
, scratch
),
16436 y_hi
= GEN_INT (0xffffffff & UINTVAL (y_hi
));
16437 rtx_insn
*insn
= emit_insn (gen_rscsi3_CC_Bout_scratch (scratch
, y_hi
,
16439 return SET_DEST (single_set (insn
));
16443 gcc_unreachable ();
16447 /* X and Y are two things to compare using CODE. Emit the compare insn and
16448 return the rtx for register 0 in the proper mode. */
16450 arm_gen_compare_reg (rtx_code code
, rtx x
, rtx y
, rtx scratch
)
16452 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
16453 return arm_gen_dicompare_reg (code
, x
, y
, scratch
);
16455 machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
16456 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
16457 if (mode
== CC_RSBmode
)
16460 scratch
= gen_rtx_SCRATCH (SImode
);
16461 emit_insn (gen_rsb_imm_compare_scratch (scratch
,
16462 GEN_INT (~UINTVAL (x
)), y
));
16465 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
16470 /* Generate a sequence of insns that will generate the correct return
16471 address mask depending on the physical architecture that the program
16474 arm_gen_return_addr_mask (void)
16476 rtx reg
= gen_reg_rtx (Pmode
);
16478 emit_insn (gen_return_addr_mask (reg
));
16483 arm_reload_in_hi (rtx
*operands
)
16485 rtx ref
= operands
[1];
16487 HOST_WIDE_INT offset
= 0;
16489 if (SUBREG_P (ref
))
16491 offset
= SUBREG_BYTE (ref
);
16492 ref
= SUBREG_REG (ref
);
16497 /* We have a pseudo which has been spilt onto the stack; there
16498 are two cases here: the first where there is a simple
16499 stack-slot replacement and a second where the stack-slot is
16500 out of range, or is used as a subreg. */
16501 if (reg_equiv_mem (REGNO (ref
)))
16503 ref
= reg_equiv_mem (REGNO (ref
));
16504 base
= find_replacement (&XEXP (ref
, 0));
16507 /* The slot is out of range, or was dressed up in a SUBREG. */
16508 base
= reg_equiv_address (REGNO (ref
));
16510 /* PR 62554: If there is no equivalent memory location then just move
16511 the value as an SImode register move. This happens when the target
16512 architecture variant does not have an HImode register move. */
16515 gcc_assert (REG_P (operands
[0]));
16516 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16517 gen_rtx_SUBREG (SImode
, ref
, 0)));
16522 base
= find_replacement (&XEXP (ref
, 0));
16524 /* Handle the case where the address is too complex to be offset by 1. */
16525 if (GET_CODE (base
) == MINUS
16526 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
16528 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16530 emit_set_insn (base_plus
, base
);
16533 else if (GET_CODE (base
) == PLUS
)
16535 /* The addend must be CONST_INT, or we would have dealt with it above. */
16536 HOST_WIDE_INT hi
, lo
;
16538 offset
+= INTVAL (XEXP (base
, 1));
16539 base
= XEXP (base
, 0);
16541 /* Rework the address into a legal sequence of insns. */
16542 /* Valid range for lo is -4095 -> 4095 */
16545 : -((-offset
) & 0xfff));
16547 /* Corner case, if lo is the max offset then we would be out of range
16548 once we have added the additional 1 below, so bump the msb into the
16549 pre-loading insn(s). */
16553 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
16554 ^ (HOST_WIDE_INT
) 0x80000000)
16555 - (HOST_WIDE_INT
) 0x80000000);
16557 gcc_assert (hi
+ lo
== offset
);
16561 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16563 /* Get the base address; addsi3 knows how to handle constants
16564 that require more than one insn. */
16565 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
16571 /* Operands[2] may overlap operands[0] (though it won't overlap
16572 operands[1]), that's why we asked for a DImode reg -- so we can
16573 use the bit that does not overlap. */
16574 if (REGNO (operands
[2]) == REGNO (operands
[0]))
16575 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16577 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
16579 emit_insn (gen_zero_extendqisi2 (scratch
,
16580 gen_rtx_MEM (QImode
,
16581 plus_constant (Pmode
, base
,
16583 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16584 gen_rtx_MEM (QImode
,
16585 plus_constant (Pmode
, base
,
16587 if (!BYTES_BIG_ENDIAN
)
16588 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16589 gen_rtx_IOR (SImode
,
16592 gen_rtx_SUBREG (SImode
, operands
[0], 0),
16596 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16597 gen_rtx_IOR (SImode
,
16598 gen_rtx_ASHIFT (SImode
, scratch
,
16600 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
16603 /* Handle storing a half-word to memory during reload by synthesizing as two
16604 byte stores. Take care not to clobber the input values until after we
16605 have moved them somewhere safe. This code assumes that if the DImode
16606 scratch in operands[2] overlaps either the input value or output address
16607 in some way, then that value must die in this insn (we absolutely need
16608 two scratch registers for some corner cases). */
16610 arm_reload_out_hi (rtx
*operands
)
16612 rtx ref
= operands
[0];
16613 rtx outval
= operands
[1];
16615 HOST_WIDE_INT offset
= 0;
16617 if (SUBREG_P (ref
))
16619 offset
= SUBREG_BYTE (ref
);
16620 ref
= SUBREG_REG (ref
);
16625 /* We have a pseudo which has been spilt onto the stack; there
16626 are two cases here: the first where there is a simple
16627 stack-slot replacement and a second where the stack-slot is
16628 out of range, or is used as a subreg. */
16629 if (reg_equiv_mem (REGNO (ref
)))
16631 ref
= reg_equiv_mem (REGNO (ref
));
16632 base
= find_replacement (&XEXP (ref
, 0));
16635 /* The slot is out of range, or was dressed up in a SUBREG. */
16636 base
= reg_equiv_address (REGNO (ref
));
16638 /* PR 62254: If there is no equivalent memory location then just move
16639 the value as an SImode register move. This happens when the target
16640 architecture variant does not have an HImode register move. */
16643 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
16645 if (REG_P (outval
))
16647 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
16648 gen_rtx_SUBREG (SImode
, outval
, 0)));
16650 else /* SUBREG_P (outval) */
16652 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
16653 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
16654 SUBREG_REG (outval
)));
16656 /* FIXME: Handle other cases ? */
16657 gcc_unreachable ();
16663 base
= find_replacement (&XEXP (ref
, 0));
16665 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
16667 /* Handle the case where the address is too complex to be offset by 1. */
16668 if (GET_CODE (base
) == MINUS
16669 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
16671 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16673 /* Be careful not to destroy OUTVAL. */
16674 if (reg_overlap_mentioned_p (base_plus
, outval
))
16676 /* Updating base_plus might destroy outval, see if we can
16677 swap the scratch and base_plus. */
16678 if (!reg_overlap_mentioned_p (scratch
, outval
))
16679 std::swap (scratch
, base_plus
);
16682 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
16684 /* Be conservative and copy OUTVAL into the scratch now,
16685 this should only be necessary if outval is a subreg
16686 of something larger than a word. */
16687 /* XXX Might this clobber base? I can't see how it can,
16688 since scratch is known to overlap with OUTVAL, and
16689 must be wider than a word. */
16690 emit_insn (gen_movhi (scratch_hi
, outval
));
16691 outval
= scratch_hi
;
16695 emit_set_insn (base_plus
, base
);
16698 else if (GET_CODE (base
) == PLUS
)
16700 /* The addend must be CONST_INT, or we would have dealt with it above. */
16701 HOST_WIDE_INT hi
, lo
;
16703 offset
+= INTVAL (XEXP (base
, 1));
16704 base
= XEXP (base
, 0);
16706 /* Rework the address into a legal sequence of insns. */
16707 /* Valid range for lo is -4095 -> 4095 */
16710 : -((-offset
) & 0xfff));
16712 /* Corner case, if lo is the max offset then we would be out of range
16713 once we have added the additional 1 below, so bump the msb into the
16714 pre-loading insn(s). */
16718 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
16719 ^ (HOST_WIDE_INT
) 0x80000000)
16720 - (HOST_WIDE_INT
) 0x80000000);
16722 gcc_assert (hi
+ lo
== offset
);
16726 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16728 /* Be careful not to destroy OUTVAL. */
16729 if (reg_overlap_mentioned_p (base_plus
, outval
))
16731 /* Updating base_plus might destroy outval, see if we
16732 can swap the scratch and base_plus. */
16733 if (!reg_overlap_mentioned_p (scratch
, outval
))
16734 std::swap (scratch
, base_plus
);
16737 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
16739 /* Be conservative and copy outval into scratch now,
16740 this should only be necessary if outval is a
16741 subreg of something larger than a word. */
16742 /* XXX Might this clobber base? I can't see how it
16743 can, since scratch is known to overlap with
16745 emit_insn (gen_movhi (scratch_hi
, outval
));
16746 outval
= scratch_hi
;
16750 /* Get the base address; addsi3 knows how to handle constants
16751 that require more than one insn. */
16752 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
16758 if (BYTES_BIG_ENDIAN
)
16760 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
16761 plus_constant (Pmode
, base
,
16763 gen_lowpart (QImode
, outval
)));
16764 emit_insn (gen_lshrsi3 (scratch
,
16765 gen_rtx_SUBREG (SImode
, outval
, 0),
16767 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
16769 gen_lowpart (QImode
, scratch
)));
16773 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
16775 gen_lowpart (QImode
, outval
)));
16776 emit_insn (gen_lshrsi3 (scratch
,
16777 gen_rtx_SUBREG (SImode
, outval
, 0),
16779 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
16780 plus_constant (Pmode
, base
,
16782 gen_lowpart (QImode
, scratch
)));
16786 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16787 (padded to the size of a word) should be passed in a register. */
16790 arm_must_pass_in_stack (const function_arg_info
&arg
)
16792 if (TARGET_AAPCS_BASED
)
16793 return must_pass_in_stack_var_size (arg
);
16795 return must_pass_in_stack_var_size_or_pad (arg
);
16799 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16800 byte of a stack argument has useful data. For legacy APCS ABIs we use
16801 the default. For AAPCS based ABIs small aggregate types are placed
16802 in the lowest memory address. */
16804 static pad_direction
16805 arm_function_arg_padding (machine_mode mode
, const_tree type
)
16807 if (!TARGET_AAPCS_BASED
)
16808 return default_function_arg_padding (mode
, type
);
16810 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
16811 return PAD_DOWNWARD
;
16817 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16818 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16819 register has useful data, and return the opposite if the most
16820 significant byte does. */
16823 arm_pad_reg_upward (machine_mode mode
,
16824 tree type
, int first ATTRIBUTE_UNUSED
)
16826 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
16828 /* For AAPCS, small aggregates, small fixed-point types,
16829 and small complex types are always padded upwards. */
16832 if ((AGGREGATE_TYPE_P (type
)
16833 || TREE_CODE (type
) == COMPLEX_TYPE
16834 || FIXED_POINT_TYPE_P (type
))
16835 && int_size_in_bytes (type
) <= 4)
16840 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
16841 && GET_MODE_SIZE (mode
) <= 4)
16846 /* Otherwise, use default padding. */
16847 return !BYTES_BIG_ENDIAN
;
16850 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16851 assuming that the address in the base register is word aligned. */
16853 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
16855 HOST_WIDE_INT max_offset
;
16857 /* Offset must be a multiple of 4 in Thumb mode. */
16858 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
16863 else if (TARGET_ARM
)
16868 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
16871 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16872 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16873 Assumes that the address in the base register RN is word aligned. Pattern
16874 guarantees that both memory accesses use the same base register,
16875 the offsets are constants within the range, and the gap between the offsets is 4.
16876 If preload complete then check that registers are legal. WBACK indicates whether
16877 address is updated. LOAD indicates whether memory access is load or store. */
16879 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
16880 bool wback
, bool load
)
16882 unsigned int t
, t2
, n
;
16884 if (!reload_completed
)
16887 if (!offset_ok_for_ldrd_strd (offset
))
16894 if ((TARGET_THUMB2
)
16895 && ((wback
&& (n
== t
|| n
== t2
))
16896 || (t
== SP_REGNUM
)
16897 || (t
== PC_REGNUM
)
16898 || (t2
== SP_REGNUM
)
16899 || (t2
== PC_REGNUM
)
16900 || (!load
&& (n
== PC_REGNUM
))
16901 || (load
&& (t
== t2
))
16902 /* Triggers Cortex-M3 LDRD errata. */
16903 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
16907 && ((wback
&& (n
== t
|| n
== t2
))
16908 || (t2
== PC_REGNUM
)
16909 || (t
% 2 != 0) /* First destination register is not even. */
16911 /* PC can be used as base register (for offset addressing only),
16912 but it is depricated. */
16913 || (n
== PC_REGNUM
)))
16919 /* Return true if a 64-bit access with alignment ALIGN and with a
16920 constant offset OFFSET from the base pointer is permitted on this
16923 align_ok_ldrd_strd (HOST_WIDE_INT align
, HOST_WIDE_INT offset
)
16925 return (unaligned_access
16926 ? (align
>= BITS_PER_WORD
&& (offset
& 3) == 0)
16927 : (align
>= 2 * BITS_PER_WORD
&& (offset
& 7) == 0));
16930 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
16931 operand MEM's address contains an immediate offset from the base
16932 register and has no side effects, in which case it sets BASE,
16933 OFFSET and ALIGN accordingly. */
16935 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
, HOST_WIDE_INT
*align
)
16939 gcc_assert (base
!= NULL
&& offset
!= NULL
);
16941 /* TODO: Handle more general memory operand patterns, such as
16942 PRE_DEC and PRE_INC. */
16944 if (side_effects_p (mem
))
16947 /* Can't deal with subregs. */
16948 if (SUBREG_P (mem
))
16951 gcc_assert (MEM_P (mem
));
16953 *offset
= const0_rtx
;
16954 *align
= MEM_ALIGN (mem
);
16956 addr
= XEXP (mem
, 0);
16958 /* If addr isn't valid for DImode, then we can't handle it. */
16959 if (!arm_legitimate_address_p (DImode
, addr
,
16960 reload_in_progress
|| reload_completed
))
16968 else if (GET_CODE (addr
) == PLUS
)
16970 *base
= XEXP (addr
, 0);
16971 *offset
= XEXP (addr
, 1);
16972 return (REG_P (*base
) && CONST_INT_P (*offset
));
16978 /* Called from a peephole2 to replace two word-size accesses with a
16979 single LDRD/STRD instruction. Returns true iff we can generate a
16980 new instruction sequence. That is, both accesses use the same base
16981 register and the gap between constant offsets is 4. This function
16982 may reorder its operands to match ldrd/strd RTL templates.
16983 OPERANDS are the operands found by the peephole matcher;
16984 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16985 corresponding memory operands. LOAD indicaates whether the access
16986 is load or store. CONST_STORE indicates a store of constant
16987 integer values held in OPERANDS[4,5] and assumes that the pattern
16988 is of length 4 insn, for the purpose of checking dead registers.
16989 COMMUTE indicates that register operands may be reordered. */
16991 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
16992 bool const_store
, bool commute
)
16995 HOST_WIDE_INT offsets
[2], offset
, align
[2];
16996 rtx base
= NULL_RTX
;
16997 rtx cur_base
, cur_offset
, tmp
;
16999 HARD_REG_SET regset
;
17001 gcc_assert (!const_store
|| !load
);
17002 /* Check that the memory references are immediate offsets from the
17003 same base register. Extract the base register, the destination
17004 registers, and the corresponding memory offsets. */
17005 for (i
= 0; i
< nops
; i
++)
17007 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
17013 else if (REGNO (base
) != REGNO (cur_base
))
17016 offsets
[i
] = INTVAL (cur_offset
);
17017 if (GET_CODE (operands
[i
]) == SUBREG
)
17019 tmp
= SUBREG_REG (operands
[i
]);
17020 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
17025 /* Make sure there is no dependency between the individual loads. */
17026 if (load
&& REGNO (operands
[0]) == REGNO (base
))
17027 return false; /* RAW */
17029 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
17030 return false; /* WAW */
17032 /* If the same input register is used in both stores
17033 when storing different constants, try to find a free register.
17034 For example, the code
17039 can be transformed into
17043 in Thumb mode assuming that r1 is free.
17044 For ARM mode do the same but only if the starting register
17045 can be made to be even. */
17047 && REGNO (operands
[0]) == REGNO (operands
[1])
17048 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
17052 CLEAR_HARD_REG_SET (regset
);
17053 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17054 if (tmp
== NULL_RTX
)
17057 /* Use the new register in the first load to ensure that
17058 if the original input register is not dead after peephole,
17059 then it will have the correct constant value. */
17062 else if (TARGET_ARM
)
17064 int regno
= REGNO (operands
[0]);
17065 if (!peep2_reg_dead_p (4, operands
[0]))
17067 /* When the input register is even and is not dead after the
17068 pattern, it has to hold the second constant but we cannot
17069 form a legal STRD in ARM mode with this register as the second
17071 if (regno
% 2 == 0)
17074 /* Is regno-1 free? */
17075 SET_HARD_REG_SET (regset
);
17076 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
17077 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17078 if (tmp
== NULL_RTX
)
17085 /* Find a DImode register. */
17086 CLEAR_HARD_REG_SET (regset
);
17087 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
17088 if (tmp
!= NULL_RTX
)
17090 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
17091 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
17095 /* Can we use the input register to form a DI register? */
17096 SET_HARD_REG_SET (regset
);
17097 CLEAR_HARD_REG_BIT(regset
,
17098 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
17099 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17100 if (tmp
== NULL_RTX
)
17102 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
17106 gcc_assert (operands
[0] != NULL_RTX
);
17107 gcc_assert (operands
[1] != NULL_RTX
);
17108 gcc_assert (REGNO (operands
[0]) % 2 == 0);
17109 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
17113 /* Make sure the instructions are ordered with lower memory access first. */
17114 if (offsets
[0] > offsets
[1])
17116 gap
= offsets
[0] - offsets
[1];
17117 offset
= offsets
[1];
17119 /* Swap the instructions such that lower memory is accessed first. */
17120 std::swap (operands
[0], operands
[1]);
17121 std::swap (operands
[2], operands
[3]);
17122 std::swap (align
[0], align
[1]);
17124 std::swap (operands
[4], operands
[5]);
17128 gap
= offsets
[1] - offsets
[0];
17129 offset
= offsets
[0];
17132 /* Make sure accesses are to consecutive memory locations. */
17133 if (gap
!= GET_MODE_SIZE (SImode
))
17136 if (!align_ok_ldrd_strd (align
[0], offset
))
17139 /* Make sure we generate legal instructions. */
17140 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17144 /* In Thumb state, where registers are almost unconstrained, there
17145 is little hope to fix it. */
17149 if (load
&& commute
)
17151 /* Try reordering registers. */
17152 std::swap (operands
[0], operands
[1]);
17153 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17160 /* If input registers are dead after this pattern, they can be
17161 reordered or replaced by other registers that are free in the
17162 current pattern. */
17163 if (!peep2_reg_dead_p (4, operands
[0])
17164 || !peep2_reg_dead_p (4, operands
[1]))
17167 /* Try to reorder the input registers. */
17168 /* For example, the code
17173 can be transformed into
17178 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
17181 std::swap (operands
[0], operands
[1]);
17185 /* Try to find a free DI register. */
17186 CLEAR_HARD_REG_SET (regset
);
17187 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
17188 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
17191 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
17192 if (tmp
== NULL_RTX
)
17195 /* DREG must be an even-numbered register in DImode.
17196 Split it into SI registers. */
17197 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
17198 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
17199 gcc_assert (operands
[0] != NULL_RTX
);
17200 gcc_assert (operands
[1] != NULL_RTX
);
17201 gcc_assert (REGNO (operands
[0]) % 2 == 0);
17202 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
17204 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
17214 /* Return true if parallel execution of the two word-size accesses provided
17215 could be satisfied with a single LDRD/STRD instruction. Two word-size
17216 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17217 register operands and OPERANDS[2,3] are the corresponding memory operands.
17220 valid_operands_ldrd_strd (rtx
*operands
, bool load
)
17223 HOST_WIDE_INT offsets
[2], offset
, align
[2];
17224 rtx base
= NULL_RTX
;
17225 rtx cur_base
, cur_offset
;
17228 /* Check that the memory references are immediate offsets from the
17229 same base register. Extract the base register, the destination
17230 registers, and the corresponding memory offsets. */
17231 for (i
= 0; i
< nops
; i
++)
17233 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
17239 else if (REGNO (base
) != REGNO (cur_base
))
17242 offsets
[i
] = INTVAL (cur_offset
);
17243 if (GET_CODE (operands
[i
]) == SUBREG
)
17247 if (offsets
[0] > offsets
[1])
17250 gap
= offsets
[1] - offsets
[0];
17251 offset
= offsets
[0];
17253 /* Make sure accesses are to consecutive memory locations. */
17254 if (gap
!= GET_MODE_SIZE (SImode
))
17257 if (!align_ok_ldrd_strd (align
[0], offset
))
17260 return operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17265 /* Print a symbolic form of X to the debug file, F. */
17267 arm_print_value (FILE *f
, rtx x
)
17269 switch (GET_CODE (x
))
17272 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
17278 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
17279 sizeof (fpstr
), 0, 1);
17289 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
17291 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
17292 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
17300 fprintf (f
, "\"%s\"", XSTR (x
, 0));
17304 fprintf (f
, "`%s'", XSTR (x
, 0));
17308 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
17312 arm_print_value (f
, XEXP (x
, 0));
17316 arm_print_value (f
, XEXP (x
, 0));
17318 arm_print_value (f
, XEXP (x
, 1));
17326 fprintf (f
, "????");
17331 /* Routines for manipulation of the constant pool. */
17333 /* Arm instructions cannot load a large constant directly into a
17334 register; they have to come from a pc relative load. The constant
17335 must therefore be placed in the addressable range of the pc
17336 relative load. Depending on the precise pc relative load
17337 instruction the range is somewhere between 256 bytes and 4k. This
17338 means that we often have to dump a constant inside a function, and
17339 generate code to branch around it.
17341 It is important to minimize this, since the branches will slow
17342 things down and make the code larger.
17344 Normally we can hide the table after an existing unconditional
17345 branch so that there is no interruption of the flow, but in the
17346 worst case the code looks like this:
17364 We fix this by performing a scan after scheduling, which notices
17365 which instructions need to have their operands fetched from the
17366 constant table and builds the table.
17368 The algorithm starts by building a table of all the constants that
17369 need fixing up and all the natural barriers in the function (places
17370 where a constant table can be dropped without breaking the flow).
17371 For each fixup we note how far the pc-relative replacement will be
17372 able to reach and the offset of the instruction into the function.
17374 Having built the table we then group the fixes together to form
17375 tables that are as large as possible (subject to addressing
17376 constraints) and emit each table of constants after the last
17377 barrier that is within range of all the instructions in the group.
17378 If a group does not contain a barrier, then we forcibly create one
17379 by inserting a jump instruction into the flow. Once the table has
17380 been inserted, the insns are then modified to reference the
17381 relevant entry in the pool.
17383 Possible enhancements to the algorithm (not implemented) are:
17385 1) For some processors and object formats, there may be benefit in
17386 aligning the pools to the start of cache lines; this alignment
17387 would need to be taken into account when calculating addressability
17390 /* These typedefs are located at the start of this file, so that
17391 they can be used in the prototypes there. This comment is to
17392 remind readers of that fact so that the following structures
17393 can be understood more easily.
17395 typedef struct minipool_node Mnode;
17396 typedef struct minipool_fixup Mfix; */
17398 struct minipool_node
17400 /* Doubly linked chain of entries. */
17403 /* The maximum offset into the code that this entry can be placed. While
17404 pushing fixes for forward references, all entries are sorted in order
17405 of increasing max_address. */
17406 HOST_WIDE_INT max_address
;
17407 /* Similarly for an entry inserted for a backwards ref. */
17408 HOST_WIDE_INT min_address
;
17409 /* The number of fixes referencing this entry. This can become zero
17410 if we "unpush" an entry. In this case we ignore the entry when we
17411 come to emit the code. */
17413 /* The offset from the start of the minipool. */
17414 HOST_WIDE_INT offset
;
17415 /* The value in table. */
17417 /* The mode of value. */
17419 /* The size of the value. With iWMMXt enabled
17420 sizes > 4 also imply an alignment of 8-bytes. */
17424 struct minipool_fixup
17428 HOST_WIDE_INT address
;
17434 HOST_WIDE_INT forwards
;
17435 HOST_WIDE_INT backwards
;
17438 /* Fixes less than a word need padding out to a word boundary. */
17439 #define MINIPOOL_FIX_SIZE(mode) \
17440 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17442 static Mnode
* minipool_vector_head
;
17443 static Mnode
* minipool_vector_tail
;
17444 static rtx_code_label
*minipool_vector_label
;
17445 static int minipool_pad
;
17447 /* The linked list of all minipool fixes required for this function. */
17448 Mfix
* minipool_fix_head
;
17449 Mfix
* minipool_fix_tail
;
17450 /* The fix entry for the current minipool, once it has been placed. */
17451 Mfix
* minipool_barrier
;
17453 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17454 #define JUMP_TABLES_IN_TEXT_SECTION 0
17457 static HOST_WIDE_INT
17458 get_jump_table_size (rtx_jump_table_data
*insn
)
17460 /* ADDR_VECs only take room if read-only data does into the text
17462 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
17464 rtx body
= PATTERN (insn
);
17465 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
17466 HOST_WIDE_INT size
;
17467 HOST_WIDE_INT modesize
;
17469 modesize
= GET_MODE_SIZE (GET_MODE (body
));
17470 size
= modesize
* XVECLEN (body
, elt
);
17474 /* Round up size of TBB table to a halfword boundary. */
17475 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
17478 /* No padding necessary for TBH. */
17481 /* Add two bytes for alignment on Thumb. */
17486 gcc_unreachable ();
17494 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17495 function descriptor) into a register and the GOT address into the
17496 FDPIC register, returning an rtx for the register holding the
17497 function address. */
17500 arm_load_function_descriptor (rtx funcdesc
)
17502 rtx fnaddr_reg
= gen_reg_rtx (Pmode
);
17503 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
17504 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
17505 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
17507 emit_move_insn (fnaddr_reg
, fnaddr
);
17509 /* The ABI requires the entry point address to be loaded first, but
17510 since we cannot support lazy binding for lack of atomic load of
17511 two 32-bits values, we do not need to bother to prevent the
17512 previous load from being moved after that of the GOT address. */
17513 emit_insn (gen_restore_pic_register_after_call (pic_reg
, gotaddr
));
17518 /* Return the maximum amount of padding that will be inserted before
17520 static HOST_WIDE_INT
17521 get_label_padding (rtx label
)
17523 HOST_WIDE_INT align
, min_insn_size
;
17525 align
= 1 << label_to_alignment (label
).levels
[0].log
;
17526 min_insn_size
= TARGET_THUMB
? 2 : 4;
17527 return align
> min_insn_size
? align
- min_insn_size
: 0;
17530 /* Move a minipool fix MP from its current location to before MAX_MP.
17531 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17532 constraints may need updating. */
17534 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
17535 HOST_WIDE_INT max_address
)
17537 /* The code below assumes these are different. */
17538 gcc_assert (mp
!= max_mp
);
17540 if (max_mp
== NULL
)
17542 if (max_address
< mp
->max_address
)
17543 mp
->max_address
= max_address
;
17547 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
17548 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
17550 mp
->max_address
= max_address
;
17552 /* Unlink MP from its current position. Since max_mp is non-null,
17553 mp->prev must be non-null. */
17554 mp
->prev
->next
= mp
->next
;
17555 if (mp
->next
!= NULL
)
17556 mp
->next
->prev
= mp
->prev
;
17558 minipool_vector_tail
= mp
->prev
;
17560 /* Re-insert it before MAX_MP. */
17562 mp
->prev
= max_mp
->prev
;
17565 if (mp
->prev
!= NULL
)
17566 mp
->prev
->next
= mp
;
17568 minipool_vector_head
= mp
;
17571 /* Save the new entry. */
17574 /* Scan over the preceding entries and adjust their addresses as
17576 while (mp
->prev
!= NULL
17577 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
17579 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
17586 /* Add a constant to the minipool for a forward reference. Returns the
17587 node added or NULL if the constant will not fit in this pool. */
17589 add_minipool_forward_ref (Mfix
*fix
)
17591 /* If set, max_mp is the first pool_entry that has a lower
17592 constraint than the one we are trying to add. */
17593 Mnode
* max_mp
= NULL
;
17594 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
17597 /* If the minipool starts before the end of FIX->INSN then this FIX
17598 cannot be placed into the current pool. Furthermore, adding the
17599 new constant pool entry may cause the pool to start FIX_SIZE bytes
17601 if (minipool_vector_head
&&
17602 (fix
->address
+ get_attr_length (fix
->insn
)
17603 >= minipool_vector_head
->max_address
- fix
->fix_size
))
17606 /* Scan the pool to see if a constant with the same value has
17607 already been added. While we are doing this, also note the
17608 location where we must insert the constant if it doesn't already
17610 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17612 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
17613 && fix
->mode
== mp
->mode
17614 && (!LABEL_P (fix
->value
)
17615 || (CODE_LABEL_NUMBER (fix
->value
)
17616 == CODE_LABEL_NUMBER (mp
->value
)))
17617 && rtx_equal_p (fix
->value
, mp
->value
))
17619 /* More than one fix references this entry. */
17621 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
17624 /* Note the insertion point if necessary. */
17626 && mp
->max_address
> max_address
)
17629 /* If we are inserting an 8-bytes aligned quantity and
17630 we have not already found an insertion point, then
17631 make sure that all such 8-byte aligned quantities are
17632 placed at the start of the pool. */
17633 if (ARM_DOUBLEWORD_ALIGN
17635 && fix
->fix_size
>= 8
17636 && mp
->fix_size
< 8)
17639 max_address
= mp
->max_address
;
17643 /* The value is not currently in the minipool, so we need to create
17644 a new entry for it. If MAX_MP is NULL, the entry will be put on
17645 the end of the list since the placement is less constrained than
17646 any existing entry. Otherwise, we insert the new fix before
17647 MAX_MP and, if necessary, adjust the constraints on the other
17650 mp
->fix_size
= fix
->fix_size
;
17651 mp
->mode
= fix
->mode
;
17652 mp
->value
= fix
->value
;
17654 /* Not yet required for a backwards ref. */
17655 mp
->min_address
= -65536;
17657 if (max_mp
== NULL
)
17659 mp
->max_address
= max_address
;
17661 mp
->prev
= minipool_vector_tail
;
17663 if (mp
->prev
== NULL
)
17665 minipool_vector_head
= mp
;
17666 minipool_vector_label
= gen_label_rtx ();
17669 mp
->prev
->next
= mp
;
17671 minipool_vector_tail
= mp
;
17675 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
17676 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
17678 mp
->max_address
= max_address
;
17681 mp
->prev
= max_mp
->prev
;
17683 if (mp
->prev
!= NULL
)
17684 mp
->prev
->next
= mp
;
17686 minipool_vector_head
= mp
;
17689 /* Save the new entry. */
17692 /* Scan over the preceding entries and adjust their addresses as
17694 while (mp
->prev
!= NULL
17695 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
17697 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
17705 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
17706 HOST_WIDE_INT min_address
)
17708 HOST_WIDE_INT offset
;
17710 /* The code below assumes these are different. */
17711 gcc_assert (mp
!= min_mp
);
17713 if (min_mp
== NULL
)
17715 if (min_address
> mp
->min_address
)
17716 mp
->min_address
= min_address
;
17720 /* We will adjust this below if it is too loose. */
17721 mp
->min_address
= min_address
;
17723 /* Unlink MP from its current position. Since min_mp is non-null,
17724 mp->next must be non-null. */
17725 mp
->next
->prev
= mp
->prev
;
17726 if (mp
->prev
!= NULL
)
17727 mp
->prev
->next
= mp
->next
;
17729 minipool_vector_head
= mp
->next
;
17731 /* Reinsert it after MIN_MP. */
17733 mp
->next
= min_mp
->next
;
17735 if (mp
->next
!= NULL
)
17736 mp
->next
->prev
= mp
;
17738 minipool_vector_tail
= mp
;
17744 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17746 mp
->offset
= offset
;
17747 if (mp
->refcount
> 0)
17748 offset
+= mp
->fix_size
;
17750 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
17751 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
17757 /* Add a constant to the minipool for a backward reference. Returns the
17758 node added or NULL if the constant will not fit in this pool.
17760 Note that the code for insertion for a backwards reference can be
17761 somewhat confusing because the calculated offsets for each fix do
17762 not take into account the size of the pool (which is still under
17765 add_minipool_backward_ref (Mfix
*fix
)
17767 /* If set, min_mp is the last pool_entry that has a lower constraint
17768 than the one we are trying to add. */
17769 Mnode
*min_mp
= NULL
;
17770 /* This can be negative, since it is only a constraint. */
17771 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
17774 /* If we can't reach the current pool from this insn, or if we can't
17775 insert this entry at the end of the pool without pushing other
17776 fixes out of range, then we don't try. This ensures that we
17777 can't fail later on. */
17778 if (min_address
>= minipool_barrier
->address
17779 || (minipool_vector_tail
->min_address
+ fix
->fix_size
17780 >= minipool_barrier
->address
))
17783 /* Scan the pool to see if a constant with the same value has
17784 already been added. While we are doing this, also note the
17785 location where we must insert the constant if it doesn't already
17787 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
17789 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
17790 && fix
->mode
== mp
->mode
17791 && (!LABEL_P (fix
->value
)
17792 || (CODE_LABEL_NUMBER (fix
->value
)
17793 == CODE_LABEL_NUMBER (mp
->value
)))
17794 && rtx_equal_p (fix
->value
, mp
->value
)
17795 /* Check that there is enough slack to move this entry to the
17796 end of the table (this is conservative). */
17797 && (mp
->max_address
17798 > (minipool_barrier
->address
17799 + minipool_vector_tail
->offset
17800 + minipool_vector_tail
->fix_size
)))
17803 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
17806 if (min_mp
!= NULL
)
17807 mp
->min_address
+= fix
->fix_size
;
17810 /* Note the insertion point if necessary. */
17811 if (mp
->min_address
< min_address
)
17813 /* For now, we do not allow the insertion of 8-byte alignment
17814 requiring nodes anywhere but at the start of the pool. */
17815 if (ARM_DOUBLEWORD_ALIGN
17816 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
17821 else if (mp
->max_address
17822 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
17824 /* Inserting before this entry would push the fix beyond
17825 its maximum address (which can happen if we have
17826 re-located a forwards fix); force the new fix to come
17828 if (ARM_DOUBLEWORD_ALIGN
17829 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
17834 min_address
= mp
->min_address
+ fix
->fix_size
;
17837 /* Do not insert a non-8-byte aligned quantity before 8-byte
17838 aligned quantities. */
17839 else if (ARM_DOUBLEWORD_ALIGN
17840 && fix
->fix_size
< 8
17841 && mp
->fix_size
>= 8)
17844 min_address
= mp
->min_address
+ fix
->fix_size
;
17849 /* We need to create a new entry. */
17851 mp
->fix_size
= fix
->fix_size
;
17852 mp
->mode
= fix
->mode
;
17853 mp
->value
= fix
->value
;
17855 mp
->max_address
= minipool_barrier
->address
+ 65536;
17857 mp
->min_address
= min_address
;
17859 if (min_mp
== NULL
)
17862 mp
->next
= minipool_vector_head
;
17864 if (mp
->next
== NULL
)
17866 minipool_vector_tail
= mp
;
17867 minipool_vector_label
= gen_label_rtx ();
17870 mp
->next
->prev
= mp
;
17872 minipool_vector_head
= mp
;
17876 mp
->next
= min_mp
->next
;
17880 if (mp
->next
!= NULL
)
17881 mp
->next
->prev
= mp
;
17883 minipool_vector_tail
= mp
;
17886 /* Save the new entry. */
17894 /* Scan over the following entries and adjust their offsets. */
17895 while (mp
->next
!= NULL
)
17897 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
17898 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
17901 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
17903 mp
->next
->offset
= mp
->offset
;
17912 assign_minipool_offsets (Mfix
*barrier
)
17914 HOST_WIDE_INT offset
= 0;
17917 minipool_barrier
= barrier
;
17919 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17921 mp
->offset
= offset
;
17923 if (mp
->refcount
> 0)
17924 offset
+= mp
->fix_size
;
17928 /* Output the literal table */
17930 dump_minipool (rtx_insn
*scan
)
17936 if (ARM_DOUBLEWORD_ALIGN
)
17937 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17938 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
17945 fprintf (dump_file
,
17946 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17947 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
17949 scan
= emit_label_after (gen_label_rtx (), scan
);
17950 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
17951 scan
= emit_label_after (minipool_vector_label
, scan
);
17953 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
17955 if (mp
->refcount
> 0)
17959 fprintf (dump_file
,
17960 ";; Offset %u, min %ld, max %ld ",
17961 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
17962 (unsigned long) mp
->max_address
);
17963 arm_print_value (dump_file
, mp
->value
);
17964 fputc ('\n', dump_file
);
17967 rtx val
= copy_rtx (mp
->value
);
17969 switch (GET_MODE_SIZE (mp
->mode
))
17971 #ifdef HAVE_consttable_1
17973 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
17977 #ifdef HAVE_consttable_2
17979 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
17983 #ifdef HAVE_consttable_4
17985 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
17989 #ifdef HAVE_consttable_8
17991 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
17995 #ifdef HAVE_consttable_16
17997 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
18002 gcc_unreachable ();
18010 minipool_vector_head
= minipool_vector_tail
= NULL
;
18011 scan
= emit_insn_after (gen_consttable_end (), scan
);
18012 scan
= emit_barrier_after (scan
);
18015 /* Return the cost of forcibly inserting a barrier after INSN. */
18017 arm_barrier_cost (rtx_insn
*insn
)
18019 /* Basing the location of the pool on the loop depth is preferable,
18020 but at the moment, the basic block information seems to be
18021 corrupt by this stage of the compilation. */
18022 int base_cost
= 50;
18023 rtx_insn
*next
= next_nonnote_insn (insn
);
18025 if (next
!= NULL
&& LABEL_P (next
))
18028 switch (GET_CODE (insn
))
18031 /* It will always be better to place the table before the label, rather
18040 return base_cost
- 10;
18043 return base_cost
+ 10;
18047 /* Find the best place in the insn stream in the range
18048 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18049 Create the barrier by inserting a jump and add a new fix entry for
18052 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
18054 HOST_WIDE_INT count
= 0;
18055 rtx_barrier
*barrier
;
18056 rtx_insn
*from
= fix
->insn
;
18057 /* The instruction after which we will insert the jump. */
18058 rtx_insn
*selected
= NULL
;
18060 /* The address at which the jump instruction will be placed. */
18061 HOST_WIDE_INT selected_address
;
18063 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
18064 rtx_code_label
*label
= gen_label_rtx ();
18066 selected_cost
= arm_barrier_cost (from
);
18067 selected_address
= fix
->address
;
18069 while (from
&& count
< max_count
)
18071 rtx_jump_table_data
*tmp
;
18074 /* This code shouldn't have been called if there was a natural barrier
18076 gcc_assert (!BARRIER_P (from
));
18078 /* Count the length of this insn. This must stay in sync with the
18079 code that pushes minipool fixes. */
18080 if (LABEL_P (from
))
18081 count
+= get_label_padding (from
);
18083 count
+= get_attr_length (from
);
18085 /* If there is a jump table, add its length. */
18086 if (tablejump_p (from
, NULL
, &tmp
))
18088 count
+= get_jump_table_size (tmp
);
18090 /* Jump tables aren't in a basic block, so base the cost on
18091 the dispatch insn. If we select this location, we will
18092 still put the pool after the table. */
18093 new_cost
= arm_barrier_cost (from
);
18095 if (count
< max_count
18096 && (!selected
|| new_cost
<= selected_cost
))
18099 selected_cost
= new_cost
;
18100 selected_address
= fix
->address
+ count
;
18103 /* Continue after the dispatch table. */
18104 from
= NEXT_INSN (tmp
);
18108 new_cost
= arm_barrier_cost (from
);
18110 if (count
< max_count
18111 && (!selected
|| new_cost
<= selected_cost
))
18114 selected_cost
= new_cost
;
18115 selected_address
= fix
->address
+ count
;
18118 from
= NEXT_INSN (from
);
18121 /* Make sure that we found a place to insert the jump. */
18122 gcc_assert (selected
);
18124 /* Create a new JUMP_INSN that branches around a barrier. */
18125 from
= emit_jump_insn_after (gen_jump (label
), selected
);
18126 JUMP_LABEL (from
) = label
;
18127 barrier
= emit_barrier_after (from
);
18128 emit_label_after (label
, barrier
);
18130 /* Create a minipool barrier entry for the new barrier. */
18131 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
18132 new_fix
->insn
= barrier
;
18133 new_fix
->address
= selected_address
;
18134 new_fix
->next
= fix
->next
;
18135 fix
->next
= new_fix
;
18140 /* Record that there is a natural barrier in the insn stream at
18143 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
18145 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
18148 fix
->address
= address
;
18151 if (minipool_fix_head
!= NULL
)
18152 minipool_fix_tail
->next
= fix
;
18154 minipool_fix_head
= fix
;
18156 minipool_fix_tail
= fix
;
18159 /* Record INSN, which will need fixing up to load a value from the
18160 minipool. ADDRESS is the offset of the insn since the start of the
18161 function; LOC is a pointer to the part of the insn which requires
18162 fixing; VALUE is the constant that must be loaded, which is of type
18165 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
18166 machine_mode mode
, rtx value
)
18168 gcc_assert (!arm_disable_literal_pool
);
18169 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
18172 fix
->address
= address
;
18175 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
18176 fix
->value
= value
;
18177 fix
->forwards
= get_attr_pool_range (insn
);
18178 fix
->backwards
= get_attr_neg_pool_range (insn
);
18179 fix
->minipool
= NULL
;
18181 /* If an insn doesn't have a range defined for it, then it isn't
18182 expecting to be reworked by this code. Better to stop now than
18183 to generate duff assembly code. */
18184 gcc_assert (fix
->forwards
|| fix
->backwards
);
18186 /* If an entry requires 8-byte alignment then assume all constant pools
18187 require 4 bytes of padding. Trying to do this later on a per-pool
18188 basis is awkward because existing pool entries have to be modified. */
18189 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
18194 fprintf (dump_file
,
18195 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18196 GET_MODE_NAME (mode
),
18197 INSN_UID (insn
), (unsigned long) address
,
18198 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
18199 arm_print_value (dump_file
, fix
->value
);
18200 fprintf (dump_file
, "\n");
18203 /* Add it to the chain of fixes. */
18206 if (minipool_fix_head
!= NULL
)
18207 minipool_fix_tail
->next
= fix
;
18209 minipool_fix_head
= fix
;
18211 minipool_fix_tail
= fix
;
18214 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18215 Returns the number of insns needed, or 99 if we always want to synthesize
18218 arm_max_const_double_inline_cost ()
18220 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
18223 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18224 Returns the number of insns needed, or 99 if we don't know how to
18227 arm_const_double_inline_cost (rtx val
)
18229 rtx lowpart
, highpart
;
18232 mode
= GET_MODE (val
);
18234 if (mode
== VOIDmode
)
18237 gcc_assert (GET_MODE_SIZE (mode
) == 8);
18239 lowpart
= gen_lowpart (SImode
, val
);
18240 highpart
= gen_highpart_mode (SImode
, mode
, val
);
18242 gcc_assert (CONST_INT_P (lowpart
));
18243 gcc_assert (CONST_INT_P (highpart
));
18245 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
18246 NULL_RTX
, NULL_RTX
, 0, 0)
18247 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
18248 NULL_RTX
, NULL_RTX
, 0, 0));
18251 /* Cost of loading a SImode constant. */
18253 arm_const_inline_cost (enum rtx_code code
, rtx val
)
18255 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
18256 NULL_RTX
, NULL_RTX
, 1, 0);
18259 /* Return true if it is worthwhile to split a 64-bit constant into two
18260 32-bit operations. This is the case if optimizing for size, or
18261 if we have load delay slots, or if one 32-bit part can be done with
18262 a single data operation. */
18264 arm_const_double_by_parts (rtx val
)
18266 machine_mode mode
= GET_MODE (val
);
18269 if (optimize_size
|| arm_ld_sched
)
18272 if (mode
== VOIDmode
)
18275 part
= gen_highpart_mode (SImode
, mode
, val
);
18277 gcc_assert (CONST_INT_P (part
));
18279 if (const_ok_for_arm (INTVAL (part
))
18280 || const_ok_for_arm (~INTVAL (part
)))
18283 part
= gen_lowpart (SImode
, val
);
18285 gcc_assert (CONST_INT_P (part
));
18287 if (const_ok_for_arm (INTVAL (part
))
18288 || const_ok_for_arm (~INTVAL (part
)))
18294 /* Return true if it is possible to inline both the high and low parts
18295 of a 64-bit constant into 32-bit data processing instructions. */
18297 arm_const_double_by_immediates (rtx val
)
18299 machine_mode mode
= GET_MODE (val
);
18302 if (mode
== VOIDmode
)
18305 part
= gen_highpart_mode (SImode
, mode
, val
);
18307 gcc_assert (CONST_INT_P (part
));
18309 if (!const_ok_for_arm (INTVAL (part
)))
18312 part
= gen_lowpart (SImode
, val
);
18314 gcc_assert (CONST_INT_P (part
));
18316 if (!const_ok_for_arm (INTVAL (part
)))
18322 /* Scan INSN and note any of its operands that need fixing.
18323 If DO_PUSHES is false we do not actually push any of the fixups
18326 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
18330 extract_constrain_insn (insn
);
18332 if (recog_data
.n_alternatives
== 0)
18335 /* Fill in recog_op_alt with information about the constraints of
18337 preprocess_constraints (insn
);
18339 const operand_alternative
*op_alt
= which_op_alt ();
18340 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
18342 /* Things we need to fix can only occur in inputs. */
18343 if (recog_data
.operand_type
[opno
] != OP_IN
)
18346 /* If this alternative is a memory reference, then any mention
18347 of constants in this alternative is really to fool reload
18348 into allowing us to accept one there. We need to fix them up
18349 now so that we output the right code. */
18350 if (op_alt
[opno
].memory_ok
)
18352 rtx op
= recog_data
.operand
[opno
];
18354 if (CONSTANT_P (op
))
18357 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
18358 recog_data
.operand_mode
[opno
], op
);
18360 else if (MEM_P (op
)
18361 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
18362 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
18366 rtx cop
= avoid_constant_pool_reference (op
);
18368 /* Casting the address of something to a mode narrower
18369 than a word can cause avoid_constant_pool_reference()
18370 to return the pool reference itself. That's no good to
18371 us here. Lets just hope that we can use the
18372 constant pool value directly. */
18374 cop
= get_pool_constant (XEXP (op
, 0));
18376 push_minipool_fix (insn
, address
,
18377 recog_data
.operand_loc
[opno
],
18378 recog_data
.operand_mode
[opno
], cop
);
18388 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18389 and unions in the context of ARMv8-M Security Extensions. It is used as a
18390 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18391 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18392 or four masks, depending on whether it is being computed for a
18393 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18394 respectively. The tree for the type of the argument or a field within an
18395 argument is passed in ARG_TYPE, the current register this argument or field
18396 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18397 argument or field starts at is passed in STARTING_BIT and the last used bit
18398 is kept in LAST_USED_BIT which is also updated accordingly. */
18400 static unsigned HOST_WIDE_INT
18401 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
18402 uint32_t * padding_bits_to_clear
,
18403 unsigned starting_bit
, int * last_used_bit
)
18406 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
18408 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
18410 unsigned current_bit
= starting_bit
;
18412 long int offset
, size
;
18415 field
= TYPE_FIELDS (arg_type
);
18418 /* The offset within a structure is always an offset from
18419 the start of that structure. Make sure we take that into the
18420 calculation of the register based offset that we use here. */
18421 offset
= starting_bit
;
18422 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
18425 /* This is the actual size of the field, for bitfields this is the
18426 bitfield width and not the container size. */
18427 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
18429 if (*last_used_bit
!= offset
)
18431 if (offset
< *last_used_bit
)
18433 /* This field's offset is before the 'last_used_bit', that
18434 means this field goes on the next register. So we need to
18435 pad the rest of the current register and increase the
18436 register number. */
18438 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
18441 padding_bits_to_clear
[*regno
] |= mask
;
18442 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18447 /* Otherwise we pad the bits between the last field's end and
18448 the start of the new field. */
18451 mask
= ((uint32_t)-1) >> (32 - offset
);
18452 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
18453 padding_bits_to_clear
[*regno
] |= mask
;
18455 current_bit
= offset
;
18458 /* Calculate further padding bits for inner structs/unions too. */
18459 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
18461 *last_used_bit
= current_bit
;
18462 not_to_clear_reg_mask
18463 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
18464 padding_bits_to_clear
, offset
,
18469 /* Update 'current_bit' with this field's size. If the
18470 'current_bit' lies in a subsequent register, update 'regno' and
18471 reset 'current_bit' to point to the current bit in that new
18473 current_bit
+= size
;
18474 while (current_bit
>= 32)
18477 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18480 *last_used_bit
= current_bit
;
18483 field
= TREE_CHAIN (field
);
18485 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18487 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
18489 tree field
, field_t
;
18490 int i
, regno_t
, field_size
;
18494 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
18495 = {-1, -1, -1, -1};
18497 /* To compute the padding bits in a union we only consider bits as
18498 padding bits if they are always either a padding bit or fall outside a
18499 fields size for all fields in the union. */
18500 field
= TYPE_FIELDS (arg_type
);
18503 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
18504 = {0U, 0U, 0U, 0U};
18505 int last_used_bit_t
= *last_used_bit
;
18507 field_t
= TREE_TYPE (field
);
18509 /* If the field's type is either a record or a union make sure to
18510 compute their padding bits too. */
18511 if (RECORD_OR_UNION_TYPE_P (field_t
))
18512 not_to_clear_reg_mask
18513 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
18514 &padding_bits_to_clear_t
[0],
18515 starting_bit
, &last_used_bit_t
);
18518 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
18519 regno_t
= (field_size
/ 32) + *regno
;
18520 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
18523 for (i
= *regno
; i
< regno_t
; i
++)
18525 /* For all but the last register used by this field only keep the
18526 padding bits that were padding bits in this field. */
18527 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
18530 /* For the last register, keep all padding bits that were padding
18531 bits in this field and any padding bits that are still valid
18532 as padding bits but fall outside of this field's size. */
18533 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
18534 padding_bits_to_clear_res
[regno_t
]
18535 &= padding_bits_to_clear_t
[regno_t
] | mask
;
18537 /* Update the maximum size of the fields in terms of registers used
18538 ('max_reg') and the 'last_used_bit' in said register. */
18539 if (max_reg
< regno_t
)
18542 max_bit
= last_used_bit_t
;
18544 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
18545 max_bit
= last_used_bit_t
;
18547 field
= TREE_CHAIN (field
);
18550 /* Update the current padding_bits_to_clear using the intersection of the
18551 padding bits of all the fields. */
18552 for (i
=*regno
; i
< max_reg
; i
++)
18553 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
18555 /* Do not keep trailing padding bits, we do not know yet whether this
18556 is the end of the argument. */
18557 mask
= ((uint32_t) 1 << max_bit
) - 1;
18558 padding_bits_to_clear
[max_reg
]
18559 |= padding_bits_to_clear_res
[max_reg
] & mask
;
18562 *last_used_bit
= max_bit
;
18565 /* This function should only be used for structs and unions. */
18566 gcc_unreachable ();
18568 return not_to_clear_reg_mask
;
18571 /* In the context of ARMv8-M Security Extensions, this function is used for both
18572 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18573 registers are used when returning or passing arguments, which is then
18574 returned as a mask. It will also compute a mask to indicate padding/unused
18575 bits for each of these registers, and passes this through the
18576 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18577 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18578 the starting register used to pass this argument or return value is passed
18579 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18580 for struct and union types. */
18582 static unsigned HOST_WIDE_INT
18583 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
18584 uint32_t * padding_bits_to_clear
)
18587 int last_used_bit
= 0;
18588 unsigned HOST_WIDE_INT not_to_clear_mask
;
18590 if (RECORD_OR_UNION_TYPE_P (arg_type
))
18593 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
18594 padding_bits_to_clear
, 0,
18598 /* If the 'last_used_bit' is not zero, that means we are still using a
18599 part of the last 'regno'. In such cases we must clear the trailing
18600 bits. Otherwise we are not using regno and we should mark it as to
18602 if (last_used_bit
!= 0)
18603 padding_bits_to_clear
[regno
]
18604 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
18606 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
18610 not_to_clear_mask
= 0;
18611 /* We are not dealing with structs nor unions. So these arguments may be
18612 passed in floating point registers too. In some cases a BLKmode is
18613 used when returning or passing arguments in multiple VFP registers. */
18614 if (GET_MODE (arg_rtx
) == BLKmode
)
18619 /* This should really only occur when dealing with the hard-float
18621 gcc_assert (TARGET_HARD_FLOAT_ABI
);
18623 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
18625 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
18626 gcc_assert (REG_P (reg
));
18628 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
18630 /* If we are dealing with DF mode, make sure we don't
18631 clear either of the registers it addresses. */
18632 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
18635 unsigned HOST_WIDE_INT mask
;
18636 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
18637 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
18638 not_to_clear_mask
|= mask
;
18644 /* Otherwise we can rely on the MODE to determine how many registers
18645 are being used by this argument. */
18646 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
18647 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
18650 unsigned HOST_WIDE_INT
18651 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
18652 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
18653 not_to_clear_mask
|= mask
;
18658 return not_to_clear_mask
;
18661 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18662 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18663 are to be fully cleared, using the value in register CLEARING_REG if more
18664 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18665 the bits that needs to be cleared in caller-saved core registers, with
18666 SCRATCH_REG used as a scratch register for that clearing.
18668 NOTE: one of three following assertions must hold:
18669 - SCRATCH_REG is a low register
18670 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18671 in TO_CLEAR_BITMAP)
18672 - CLEARING_REG is a low register. */
18675 cmse_clear_registers (sbitmap to_clear_bitmap
, uint32_t *padding_bits_to_clear
,
18676 int padding_bits_len
, rtx scratch_reg
, rtx clearing_reg
)
18678 bool saved_clearing
= false;
18679 rtx saved_clearing_reg
= NULL_RTX
;
18680 int i
, regno
, clearing_regno
, minregno
= R0_REGNUM
, maxregno
= minregno
- 1;
18682 gcc_assert (arm_arch_cmse
);
18684 if (!bitmap_empty_p (to_clear_bitmap
))
18686 minregno
= bitmap_first_set_bit (to_clear_bitmap
);
18687 maxregno
= bitmap_last_set_bit (to_clear_bitmap
);
18689 clearing_regno
= REGNO (clearing_reg
);
18691 /* Clear padding bits. */
18692 gcc_assert (padding_bits_len
<= NUM_ARG_REGS
);
18693 for (i
= 0, regno
= R0_REGNUM
; i
< padding_bits_len
; i
++, regno
++)
18696 rtx rtx16
, dest
, cleared_reg
= gen_rtx_REG (SImode
, regno
);
18698 if (padding_bits_to_clear
[i
] == 0)
18701 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18702 CLEARING_REG as scratch. */
18704 && REGNO (scratch_reg
) > LAST_LO_REGNUM
)
18706 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18707 such that we can use clearing_reg to clear the unused bits in the
18709 if ((clearing_regno
> maxregno
18710 || !bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
18711 && !saved_clearing
)
18713 gcc_assert (clearing_regno
<= LAST_LO_REGNUM
);
18714 emit_move_insn (scratch_reg
, clearing_reg
);
18715 saved_clearing
= true;
18716 saved_clearing_reg
= scratch_reg
;
18718 scratch_reg
= clearing_reg
;
18721 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18722 mask
= (~padding_bits_to_clear
[i
]) & 0xFFFF;
18723 emit_move_insn (scratch_reg
, gen_int_mode (mask
, SImode
));
18725 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18726 mask
= (~padding_bits_to_clear
[i
]) >> 16;
18727 rtx16
= gen_int_mode (16, SImode
);
18728 dest
= gen_rtx_ZERO_EXTRACT (SImode
, scratch_reg
, rtx16
, rtx16
);
18730 emit_insn (gen_rtx_SET (dest
, gen_int_mode (mask
, SImode
)));
18732 emit_insn (gen_andsi3 (cleared_reg
, cleared_reg
, scratch_reg
));
18734 if (saved_clearing
)
18735 emit_move_insn (clearing_reg
, saved_clearing_reg
);
18738 /* Clear full registers. */
18740 if (TARGET_HAVE_FPCXT_CMSE
)
18743 int i
, j
, k
, nb_regs
;
18744 rtx use_seq
, par
, reg
, set
, vunspec
;
18745 int to_clear_bitmap_size
= SBITMAP_SIZE (to_clear_bitmap
);
18746 auto_sbitmap
core_regs_bitmap (to_clear_bitmap_size
);
18747 auto_sbitmap
to_clear_core_bitmap (to_clear_bitmap_size
);
18749 for (i
= FIRST_VFP_REGNUM
; i
<= maxregno
; i
+= nb_regs
)
18751 /* Find next register to clear and exit if none. */
18752 for (; i
<= maxregno
&& !bitmap_bit_p (to_clear_bitmap
, i
); i
++);
18756 /* Compute number of consecutive registers to clear. */
18757 for (j
= i
; j
<= maxregno
&& bitmap_bit_p (to_clear_bitmap
, j
);
18761 /* Create VSCCLRM RTX pattern. */
18762 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nb_regs
+ 1));
18763 vunspec_vec
= gen_rtvec (1, gen_int_mode (0, SImode
));
18764 vunspec
= gen_rtx_UNSPEC_VOLATILE (SImode
, vunspec_vec
,
18765 VUNSPEC_VSCCLRM_VPR
);
18766 XVECEXP (par
, 0, 0) = vunspec
;
18768 /* Insert VFP register clearing RTX in the pattern. */
18770 for (k
= 1, j
= i
; j
<= maxregno
&& k
< nb_regs
+ 1; j
++)
18772 if (!bitmap_bit_p (to_clear_bitmap
, j
))
18775 reg
= gen_rtx_REG (SFmode
, j
);
18776 set
= gen_rtx_SET (reg
, const0_rtx
);
18777 XVECEXP (par
, 0, k
++) = set
;
18780 use_seq
= get_insns ();
18783 emit_insn_after (use_seq
, emit_insn (par
));
18786 /* Get set of core registers to clear. */
18787 bitmap_clear (core_regs_bitmap
);
18788 bitmap_set_range (core_regs_bitmap
, R0_REGNUM
,
18789 IP_REGNUM
- R0_REGNUM
+ 1);
18790 bitmap_and (to_clear_core_bitmap
, to_clear_bitmap
,
18792 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap
));
18794 if (bitmap_empty_p (to_clear_core_bitmap
))
18797 /* Create clrm RTX pattern. */
18798 nb_regs
= bitmap_count_bits (to_clear_core_bitmap
);
18799 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nb_regs
+ 2));
18801 /* Insert core register clearing RTX in the pattern. */
18803 for (j
= 0, i
= minregno
; j
< nb_regs
; i
++)
18805 if (!bitmap_bit_p (to_clear_core_bitmap
, i
))
18808 reg
= gen_rtx_REG (SImode
, i
);
18809 set
= gen_rtx_SET (reg
, const0_rtx
);
18810 XVECEXP (par
, 0, j
++) = set
;
18814 /* Insert APSR register clearing RTX in the pattern
18815 * along with clobbering CC. */
18816 vunspec_vec
= gen_rtvec (1, gen_int_mode (0, SImode
));
18817 vunspec
= gen_rtx_UNSPEC_VOLATILE (SImode
, vunspec_vec
,
18818 VUNSPEC_CLRM_APSR
);
18820 XVECEXP (par
, 0, j
++) = vunspec
;
18822 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
18823 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
18824 XVECEXP (par
, 0, j
) = clobber
;
18826 use_seq
= get_insns ();
18829 emit_insn_after (use_seq
, emit_insn (par
));
18833 /* If not marked for clearing, clearing_reg already does not contain
18835 if (clearing_regno
<= maxregno
18836 && bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
18838 emit_move_insn (clearing_reg
, const0_rtx
);
18839 emit_use (clearing_reg
);
18840 bitmap_clear_bit (to_clear_bitmap
, clearing_regno
);
18843 for (regno
= minregno
; regno
<= maxregno
; regno
++)
18845 if (!bitmap_bit_p (to_clear_bitmap
, regno
))
18848 if (IS_VFP_REGNUM (regno
))
18850 /* If regno is an even vfp register and its successor is also to
18851 be cleared, use vmov. */
18852 if (TARGET_VFP_DOUBLE
18853 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
18854 && bitmap_bit_p (to_clear_bitmap
, regno
+ 1))
18856 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
18857 CONST1_RTX (DFmode
));
18858 emit_use (gen_rtx_REG (DFmode
, regno
));
18863 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
18864 CONST1_RTX (SFmode
));
18865 emit_use (gen_rtx_REG (SFmode
, regno
));
18870 emit_move_insn (gen_rtx_REG (SImode
, regno
), clearing_reg
);
18871 emit_use (gen_rtx_REG (SImode
, regno
));
18877 /* Clear core and caller-saved VFP registers not used to pass arguments before
18878 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
18879 registers is done in the __gnu_cmse_nonsecure_call libcall. See
18880 libgcc/config/arm/cmse_nonsecure_call.S. */
18883 cmse_nonsecure_call_inline_register_clear (void)
18887 FOR_EACH_BB_FN (bb
, cfun
)
18891 FOR_BB_INSNS (bb
, insn
)
18893 bool clear_callee_saved
= TARGET_HAVE_FPCXT_CMSE
;
18894 /* frame = VFP regs + FPSCR + VPR. */
18895 unsigned lazy_store_stack_frame_size
18896 = (LAST_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1 + 2) * UNITS_PER_WORD
;
18897 unsigned long callee_saved_mask
18898 = ((1 << (LAST_HI_REGNUM
+ 1)) - 1)
18899 & ~((1 << (LAST_ARG_REGNUM
+ 1)) - 1);
18900 unsigned address_regnum
, regno
;
18901 unsigned max_int_regno
18902 = clear_callee_saved
? IP_REGNUM
: LAST_ARG_REGNUM
;
18903 unsigned max_fp_regno
18904 = TARGET_HAVE_FPCXT_CMSE
? LAST_VFP_REGNUM
: D7_VFP_REGNUM
;
18906 = TARGET_HARD_FLOAT_ABI
? max_fp_regno
: max_int_regno
;
18907 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
18909 rtx pat
, call
, unspec
, clearing_reg
, ip_reg
, shift
;
18911 CUMULATIVE_ARGS args_so_far_v
;
18912 cumulative_args_t args_so_far
;
18913 tree arg_type
, fntype
;
18914 bool first_param
= true, lazy_fpclear
= !TARGET_HARD_FLOAT_ABI
;
18915 function_args_iterator args_iter
;
18916 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
18918 if (!NONDEBUG_INSN_P (insn
))
18921 if (!CALL_P (insn
))
18924 pat
= PATTERN (insn
);
18925 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
18926 call
= XVECEXP (pat
, 0, 0);
18928 /* Get the real call RTX if the insn sets a value, ie. returns. */
18929 if (GET_CODE (call
) == SET
)
18930 call
= SET_SRC (call
);
18932 /* Check if it is a cmse_nonsecure_call. */
18933 unspec
= XEXP (call
, 0);
18934 if (GET_CODE (unspec
) != UNSPEC
18935 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
18938 /* Mark registers that needs to be cleared. Those that holds a
18939 parameter are removed from the set further below. */
18940 bitmap_clear (to_clear_bitmap
);
18941 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
,
18942 max_int_regno
- R0_REGNUM
+ 1);
18944 /* Only look at the caller-saved floating point registers in case of
18945 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
18946 lazy store and loads which clear both caller- and callee-saved
18950 auto_sbitmap
float_bitmap (maxregno
+ 1);
18952 bitmap_clear (float_bitmap
);
18953 bitmap_set_range (float_bitmap
, FIRST_VFP_REGNUM
,
18954 max_fp_regno
- FIRST_VFP_REGNUM
+ 1);
18955 bitmap_ior (to_clear_bitmap
, to_clear_bitmap
, float_bitmap
);
18958 /* Make sure the register used to hold the function address is not
18960 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
18961 gcc_assert (MEM_P (address
));
18962 gcc_assert (REG_P (XEXP (address
, 0)));
18963 address_regnum
= REGNO (XEXP (address
, 0));
18964 if (address_regnum
<= max_int_regno
)
18965 bitmap_clear_bit (to_clear_bitmap
, address_regnum
);
18967 /* Set basic block of call insn so that df rescan is performed on
18968 insns inserted here. */
18969 set_block_for_insn (insn
, bb
);
18970 df_set_flags (DF_DEFER_INSN_RESCAN
);
18973 /* Make sure the scheduler doesn't schedule other insns beyond
18975 emit_insn (gen_blockage ());
18977 /* Walk through all arguments and clear registers appropriately.
18979 fntype
= TREE_TYPE (MEM_EXPR (address
));
18980 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
18982 args_so_far
= pack_cumulative_args (&args_so_far_v
);
18983 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
18986 uint64_t to_clear_args_mask
;
18988 if (VOID_TYPE_P (arg_type
))
18991 function_arg_info
arg (arg_type
, /*named=*/true);
18993 /* ??? We should advance after processing the argument and pass
18994 the argument we're advancing past. */
18995 arm_function_arg_advance (args_so_far
, arg
);
18997 arg_rtx
= arm_function_arg (args_so_far
, arg
);
18998 gcc_assert (REG_P (arg_rtx
));
19000 = compute_not_to_clear_mask (arg_type
, arg_rtx
,
19002 &padding_bits_to_clear
[0]);
19003 if (to_clear_args_mask
)
19005 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
19007 if (to_clear_args_mask
& (1ULL << regno
))
19008 bitmap_clear_bit (to_clear_bitmap
, regno
);
19012 first_param
= false;
19015 /* We use right shift and left shift to clear the LSB of the address
19016 we jump to instead of using bic, to avoid having to use an extra
19017 register on Thumb-1. */
19018 clearing_reg
= XEXP (address
, 0);
19019 shift
= gen_rtx_LSHIFTRT (SImode
, clearing_reg
, const1_rtx
);
19020 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
19021 shift
= gen_rtx_ASHIFT (SImode
, clearing_reg
, const1_rtx
);
19022 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
19024 if (clear_callee_saved
)
19027 emit_multi_reg_push (callee_saved_mask
, callee_saved_mask
);
19028 /* Disable frame debug info in push because it needs to be
19029 disabled for pop (see below). */
19030 RTX_FRAME_RELATED_P (push_insn
) = 0;
19032 /* Lazy store multiple. */
19036 rtx_insn
*add_insn
;
19038 imm
= gen_int_mode (- lazy_store_stack_frame_size
, SImode
);
19039 add_insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
19040 stack_pointer_rtx
, imm
));
19041 /* If we have the frame pointer, then it will be the
19042 CFA reg. Otherwise, the stack pointer is the CFA
19043 reg, so we need to emit a CFA adjust. */
19044 if (!frame_pointer_needed
)
19045 arm_add_cfa_adjust_cfa_note (add_insn
,
19046 - lazy_store_stack_frame_size
,
19048 stack_pointer_rtx
);
19049 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx
));
19051 /* Save VFP callee-saved registers. */
19054 vfp_emit_fstmd (D7_VFP_REGNUM
+ 1,
19055 (max_fp_regno
- D7_VFP_REGNUM
) / 2);
19056 /* Disable frame debug info in push because it needs to be
19057 disabled for vpop (see below). */
19058 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19062 /* Clear caller-saved registers that leak before doing a non-secure
19064 ip_reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
19065 cmse_clear_registers (to_clear_bitmap
, padding_bits_to_clear
,
19066 NUM_ARG_REGS
, ip_reg
, clearing_reg
);
19068 seq
= get_insns ();
19070 emit_insn_before (seq
, insn
);
19072 if (TARGET_HAVE_FPCXT_CMSE
)
19074 rtx_insn
*last
, *pop_insn
, *after
= insn
;
19078 /* Lazy load multiple done as part of libcall in Armv8-M. */
19081 rtx imm
= gen_int_mode (lazy_store_stack_frame_size
, SImode
);
19082 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx
));
19083 rtx_insn
*add_insn
=
19084 emit_insn (gen_addsi3 (stack_pointer_rtx
,
19085 stack_pointer_rtx
, imm
));
19086 if (!frame_pointer_needed
)
19087 arm_add_cfa_adjust_cfa_note (add_insn
,
19088 lazy_store_stack_frame_size
,
19090 stack_pointer_rtx
);
19092 /* Restore VFP callee-saved registers. */
19095 int nb_callee_saved_vfp_regs
=
19096 (max_fp_regno
- D7_VFP_REGNUM
) / 2;
19097 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM
+ 1,
19098 nb_callee_saved_vfp_regs
,
19099 stack_pointer_rtx
);
19100 /* Disable frame debug info in vpop because the SP adjustment
19101 is made using a CFA adjustment note while CFA used is
19102 sometimes R7. This then causes an assert failure in the
19103 CFI note creation code. */
19104 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19107 arm_emit_multi_reg_pop (callee_saved_mask
);
19108 pop_insn
= get_last_insn ();
19110 /* Disable frame debug info in pop because they reset the state
19111 of popped registers to what it was at the beginning of the
19112 function, before the prologue. This leads to incorrect state
19113 when doing the pop after the nonsecure call for registers that
19114 are pushed both in prologue and before the nonsecure call.
19116 It also occasionally triggers an assert failure in CFI note
19117 creation code when there are two codepaths to the epilogue,
19118 one of which does not go through the nonsecure call.
19119 Obviously this mean that debugging between the push and pop is
19121 RTX_FRAME_RELATED_P (pop_insn
) = 0;
19123 seq
= get_insns ();
19124 last
= get_last_insn ();
19127 emit_insn_after (seq
, after
);
19129 /* Skip pop we have just inserted after nonsecure call, we know
19130 it does not contain a nonsecure call. */
19137 /* Rewrite move insn into subtract of 0 if the condition codes will
19138 be useful in next conditional jump insn. */
19141 thumb1_reorg (void)
19145 FOR_EACH_BB_FN (bb
, cfun
)
19148 rtx cmp
, op0
, op1
, set
= NULL
;
19149 rtx_insn
*prev
, *insn
= BB_END (bb
);
19150 bool insn_clobbered
= false;
19152 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
19153 insn
= PREV_INSN (insn
);
19155 /* Find the last cbranchsi4_insn in basic block BB. */
19156 if (insn
== BB_HEAD (bb
)
19157 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
19160 /* Get the register with which we are comparing. */
19161 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
19162 op0
= XEXP (cmp
, 0);
19163 op1
= XEXP (cmp
, 1);
19165 /* Check that comparison is against ZERO. */
19166 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
19169 /* Find the first flag setting insn before INSN in basic block BB. */
19170 gcc_assert (insn
!= BB_HEAD (bb
));
19171 for (prev
= PREV_INSN (insn
);
19173 && prev
!= BB_HEAD (bb
)
19175 || DEBUG_INSN_P (prev
)
19176 || ((set
= single_set (prev
)) != NULL
19177 && get_attr_conds (prev
) == CONDS_NOCOND
)));
19178 prev
= PREV_INSN (prev
))
19180 if (reg_set_p (op0
, prev
))
19181 insn_clobbered
= true;
19184 /* Skip if op0 is clobbered by insn other than prev. */
19185 if (insn_clobbered
)
19191 dest
= SET_DEST (set
);
19192 src
= SET_SRC (set
);
19193 if (!low_register_operand (dest
, SImode
)
19194 || !low_register_operand (src
, SImode
))
19197 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19198 in INSN. Both src and dest of the move insn are checked. */
19199 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
19201 dest
= copy_rtx (dest
);
19202 src
= copy_rtx (src
);
19203 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
19204 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
19205 INSN_CODE (prev
) = -1;
19206 /* Set test register in INSN to dest. */
19207 XEXP (cmp
, 0) = copy_rtx (dest
);
19208 INSN_CODE (insn
) = -1;
19213 /* Convert instructions to their cc-clobbering variant if possible, since
19214 that allows us to use smaller encodings. */
19217 thumb2_reorg (void)
19222 INIT_REG_SET (&live
);
19224 /* We are freeing block_for_insn in the toplev to keep compatibility
19225 with old MDEP_REORGS that are not CFG based. Recompute it now. */
19226 compute_bb_for_insn ();
19229 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
19231 FOR_EACH_BB_FN (bb
, cfun
)
19233 if ((current_tune
->disparage_flag_setting_t16_encodings
19234 == tune_params::DISPARAGE_FLAGS_ALL
)
19235 && optimize_bb_for_speed_p (bb
))
19239 Convert_Action action
= SKIP
;
19240 Convert_Action action_for_partial_flag_setting
19241 = ((current_tune
->disparage_flag_setting_t16_encodings
19242 != tune_params::DISPARAGE_FLAGS_NEITHER
)
19243 && optimize_bb_for_speed_p (bb
))
19246 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
19247 df_simulate_initialize_backwards (bb
, &live
);
19248 FOR_BB_INSNS_REVERSE (bb
, insn
)
19250 if (NONJUMP_INSN_P (insn
)
19251 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
19252 && GET_CODE (PATTERN (insn
)) == SET
)
19255 rtx pat
= PATTERN (insn
);
19256 rtx dst
= XEXP (pat
, 0);
19257 rtx src
= XEXP (pat
, 1);
19258 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
19260 if (UNARY_P (src
) || BINARY_P (src
))
19261 op0
= XEXP (src
, 0);
19263 if (BINARY_P (src
))
19264 op1
= XEXP (src
, 1);
19266 if (low_register_operand (dst
, SImode
))
19268 switch (GET_CODE (src
))
19271 /* Adding two registers and storing the result
19272 in the first source is already a 16-bit
19274 if (rtx_equal_p (dst
, op0
)
19275 && register_operand (op1
, SImode
))
19278 if (low_register_operand (op0
, SImode
))
19280 /* ADDS <Rd>,<Rn>,<Rm> */
19281 if (low_register_operand (op1
, SImode
))
19283 /* ADDS <Rdn>,#<imm8> */
19284 /* SUBS <Rdn>,#<imm8> */
19285 else if (rtx_equal_p (dst
, op0
)
19286 && CONST_INT_P (op1
)
19287 && IN_RANGE (INTVAL (op1
), -255, 255))
19289 /* ADDS <Rd>,<Rn>,#<imm3> */
19290 /* SUBS <Rd>,<Rn>,#<imm3> */
19291 else if (CONST_INT_P (op1
)
19292 && IN_RANGE (INTVAL (op1
), -7, 7))
19295 /* ADCS <Rd>, <Rn> */
19296 else if (GET_CODE (XEXP (src
, 0)) == PLUS
19297 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
19298 && low_register_operand (XEXP (XEXP (src
, 0), 1),
19300 && COMPARISON_P (op1
)
19301 && cc_register (XEXP (op1
, 0), VOIDmode
)
19302 && maybe_get_arm_condition_code (op1
) == ARM_CS
19303 && XEXP (op1
, 1) == const0_rtx
)
19308 /* RSBS <Rd>,<Rn>,#0
19309 Not handled here: see NEG below. */
19310 /* SUBS <Rd>,<Rn>,#<imm3>
19312 Not handled here: see PLUS above. */
19313 /* SUBS <Rd>,<Rn>,<Rm> */
19314 if (low_register_operand (op0
, SImode
)
19315 && low_register_operand (op1
, SImode
))
19320 /* MULS <Rdm>,<Rn>,<Rdm>
19321 As an exception to the rule, this is only used
19322 when optimizing for size since MULS is slow on all
19323 known implementations. We do not even want to use
19324 MULS in cold code, if optimizing for speed, so we
19325 test the global flag here. */
19326 if (!optimize_size
)
19328 /* Fall through. */
19332 /* ANDS <Rdn>,<Rm> */
19333 if (rtx_equal_p (dst
, op0
)
19334 && low_register_operand (op1
, SImode
))
19335 action
= action_for_partial_flag_setting
;
19336 else if (rtx_equal_p (dst
, op1
)
19337 && low_register_operand (op0
, SImode
))
19338 action
= action_for_partial_flag_setting
== SKIP
19339 ? SKIP
: SWAP_CONV
;
19345 /* ASRS <Rdn>,<Rm> */
19346 /* LSRS <Rdn>,<Rm> */
19347 /* LSLS <Rdn>,<Rm> */
19348 if (rtx_equal_p (dst
, op0
)
19349 && low_register_operand (op1
, SImode
))
19350 action
= action_for_partial_flag_setting
;
19351 /* ASRS <Rd>,<Rm>,#<imm5> */
19352 /* LSRS <Rd>,<Rm>,#<imm5> */
19353 /* LSLS <Rd>,<Rm>,#<imm5> */
19354 else if (low_register_operand (op0
, SImode
)
19355 && CONST_INT_P (op1
)
19356 && IN_RANGE (INTVAL (op1
), 0, 31))
19357 action
= action_for_partial_flag_setting
;
19361 /* RORS <Rdn>,<Rm> */
19362 if (rtx_equal_p (dst
, op0
)
19363 && low_register_operand (op1
, SImode
))
19364 action
= action_for_partial_flag_setting
;
19368 /* MVNS <Rd>,<Rm> */
19369 if (low_register_operand (op0
, SImode
))
19370 action
= action_for_partial_flag_setting
;
19374 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19375 if (low_register_operand (op0
, SImode
))
19380 /* MOVS <Rd>,#<imm8> */
19381 if (CONST_INT_P (src
)
19382 && IN_RANGE (INTVAL (src
), 0, 255))
19383 action
= action_for_partial_flag_setting
;
19387 /* MOVS and MOV<c> with registers have different
19388 encodings, so are not relevant here. */
19396 if (action
!= SKIP
)
19398 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
19399 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
19402 if (action
== SWAP_CONV
)
19404 src
= copy_rtx (src
);
19405 XEXP (src
, 0) = op1
;
19406 XEXP (src
, 1) = op0
;
19407 pat
= gen_rtx_SET (dst
, src
);
19408 vec
= gen_rtvec (2, pat
, clobber
);
19410 else /* action == CONV */
19411 vec
= gen_rtvec (2, pat
, clobber
);
19413 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
19414 INSN_CODE (insn
) = -1;
19418 if (NONDEBUG_INSN_P (insn
))
19419 df_simulate_one_insn_backwards (bb
, insn
, &live
);
19423 CLEAR_REG_SET (&live
);
19426 /* Gcc puts the pool in the wrong place for ARM, since we can only
19427 load addresses a limited distance around the pc. We do some
19428 special munging to move the constant pool values to the correct
19429 point in the code. */
19434 HOST_WIDE_INT address
= 0;
19438 cmse_nonsecure_call_inline_register_clear ();
19440 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19441 if (cfun
->is_thunk
)
19443 else if (TARGET_THUMB1
)
19445 else if (TARGET_THUMB2
)
19448 /* Ensure all insns that must be split have been split at this point.
19449 Otherwise, the pool placement code below may compute incorrect
19450 insn lengths. Note that when optimizing, all insns have already
19451 been split at this point. */
19453 split_all_insns_noflow ();
19455 /* Make sure we do not attempt to create a literal pool even though it should
19456 no longer be necessary to create any. */
19457 if (arm_disable_literal_pool
)
19460 minipool_fix_head
= minipool_fix_tail
= NULL
;
19462 /* The first insn must always be a note, or the code below won't
19463 scan it properly. */
19464 insn
= get_insns ();
19465 gcc_assert (NOTE_P (insn
));
19468 /* Scan all the insns and record the operands that will need fixing. */
19469 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
19471 if (BARRIER_P (insn
))
19472 push_minipool_barrier (insn
, address
);
19473 else if (INSN_P (insn
))
19475 rtx_jump_table_data
*table
;
19477 note_invalid_constants (insn
, address
, true);
19478 address
+= get_attr_length (insn
);
19480 /* If the insn is a vector jump, add the size of the table
19481 and skip the table. */
19482 if (tablejump_p (insn
, NULL
, &table
))
19484 address
+= get_jump_table_size (table
);
19488 else if (LABEL_P (insn
))
19489 /* Add the worst-case padding due to alignment. We don't add
19490 the _current_ padding because the minipool insertions
19491 themselves might change it. */
19492 address
+= get_label_padding (insn
);
19495 fix
= minipool_fix_head
;
19497 /* Now scan the fixups and perform the required changes. */
19502 Mfix
* last_added_fix
;
19503 Mfix
* last_barrier
= NULL
;
19506 /* Skip any further barriers before the next fix. */
19507 while (fix
&& BARRIER_P (fix
->insn
))
19510 /* No more fixes. */
19514 last_added_fix
= NULL
;
19516 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
19518 if (BARRIER_P (ftmp
->insn
))
19520 if (ftmp
->address
>= minipool_vector_head
->max_address
)
19523 last_barrier
= ftmp
;
19525 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
19528 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
19531 /* If we found a barrier, drop back to that; any fixes that we
19532 could have reached but come after the barrier will now go in
19533 the next mini-pool. */
19534 if (last_barrier
!= NULL
)
19536 /* Reduce the refcount for those fixes that won't go into this
19538 for (fdel
= last_barrier
->next
;
19539 fdel
&& fdel
!= ftmp
;
19542 fdel
->minipool
->refcount
--;
19543 fdel
->minipool
= NULL
;
19546 ftmp
= last_barrier
;
19550 /* ftmp is first fix that we can't fit into this pool and
19551 there no natural barriers that we could use. Insert a
19552 new barrier in the code somewhere between the previous
19553 fix and this one, and arrange to jump around it. */
19554 HOST_WIDE_INT max_address
;
19556 /* The last item on the list of fixes must be a barrier, so
19557 we can never run off the end of the list of fixes without
19558 last_barrier being set. */
19561 max_address
= minipool_vector_head
->max_address
;
19562 /* Check that there isn't another fix that is in range that
19563 we couldn't fit into this pool because the pool was
19564 already too large: we need to put the pool before such an
19565 instruction. The pool itself may come just after the
19566 fix because create_fix_barrier also allows space for a
19567 jump instruction. */
19568 if (ftmp
->address
< max_address
)
19569 max_address
= ftmp
->address
+ 1;
19571 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
19574 assign_minipool_offsets (last_barrier
);
19578 if (!BARRIER_P (ftmp
->insn
)
19579 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
19586 /* Scan over the fixes we have identified for this pool, fixing them
19587 up and adding the constants to the pool itself. */
19588 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
19589 this_fix
= this_fix
->next
)
19590 if (!BARRIER_P (this_fix
->insn
))
19593 = plus_constant (Pmode
,
19594 gen_rtx_LABEL_REF (VOIDmode
,
19595 minipool_vector_label
),
19596 this_fix
->minipool
->offset
);
19597 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
19600 dump_minipool (last_barrier
->insn
);
19604 /* From now on we must synthesize any constants that we can't handle
19605 directly. This can happen if the RTL gets split during final
19606 instruction generation. */
19607 cfun
->machine
->after_arm_reorg
= 1;
19609 /* Free the minipool memory. */
19610 obstack_free (&minipool_obstack
, minipool_startobj
);
19613 /* Routines to output assembly language. */
19615 /* Return string representation of passed in real value. */
19616 static const char *
19617 fp_const_from_val (REAL_VALUE_TYPE
*r
)
19619 if (!fp_consts_inited
)
19622 gcc_assert (real_equal (r
, &value_fp0
));
19626 /* OPERANDS[0] is the entire list of insns that constitute pop,
19627 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19628 is in the list, UPDATE is true iff the list contains explicit
19629 update of base register. */
19631 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
19637 const char *conditional
;
19638 int num_saves
= XVECLEN (operands
[0], 0);
19639 unsigned int regno
;
19640 unsigned int regno_base
= REGNO (operands
[1]);
19641 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
19644 offset
+= update
? 1 : 0;
19645 offset
+= return_pc
? 1 : 0;
19647 /* Is the base register in the list? */
19648 for (i
= offset
; i
< num_saves
; i
++)
19650 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
19651 /* If SP is in the list, then the base register must be SP. */
19652 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
19653 /* If base register is in the list, there must be no explicit update. */
19654 if (regno
== regno_base
)
19655 gcc_assert (!update
);
19658 conditional
= reverse
? "%?%D0" : "%?%d0";
19659 /* Can't use POP if returning from an interrupt. */
19660 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
19661 sprintf (pattern
, "pop%s\t{", conditional
);
19664 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19665 It's just a convention, their semantics are identical. */
19666 if (regno_base
== SP_REGNUM
)
19667 sprintf (pattern
, "ldmfd%s\t", conditional
);
19669 sprintf (pattern
, "ldmia%s\t", conditional
);
19671 sprintf (pattern
, "ldm%s\t", conditional
);
19673 strcat (pattern
, reg_names
[regno_base
]);
19675 strcat (pattern
, "!, {");
19677 strcat (pattern
, ", {");
19680 /* Output the first destination register. */
19682 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
19684 /* Output the rest of the destination registers. */
19685 for (i
= offset
+ 1; i
< num_saves
; i
++)
19687 strcat (pattern
, ", ");
19689 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
19692 strcat (pattern
, "}");
19694 if (interrupt_p
&& return_pc
)
19695 strcat (pattern
, "^");
19697 output_asm_insn (pattern
, &cond
);
19701 /* Output the assembly for a store multiple. */
19704 vfp_output_vstmd (rtx
* operands
)
19710 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
19711 ? XEXP (operands
[0], 0)
19712 : XEXP (XEXP (operands
[0], 0), 0);
19713 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
19716 strcpy (pattern
, "vpush%?.64\t{%P1");
19718 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
19720 p
= strlen (pattern
);
19722 gcc_assert (REG_P (operands
[1]));
19724 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
19725 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
19727 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
19729 strcpy (&pattern
[p
], "}");
19731 output_asm_insn (pattern
, operands
);
19736 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19737 number of bytes pushed. */
19740 vfp_emit_fstmd (int base_reg
, int count
)
19747 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19748 register pairs are stored by a store multiple insn. We avoid this
19749 by pushing an extra pair. */
19750 if (count
== 2 && !arm_arch6
)
19752 if (base_reg
== LAST_VFP_REGNUM
- 3)
19757 /* FSTMD may not store more than 16 doubleword registers at once. Split
19758 larger stores into multiple parts (up to a maximum of two, in
19763 /* NOTE: base_reg is an internal register number, so each D register
19765 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
19766 saved
+= vfp_emit_fstmd (base_reg
, 16);
19770 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
19771 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
19773 reg
= gen_rtx_REG (DFmode
, base_reg
);
19776 XVECEXP (par
, 0, 0)
19777 = gen_rtx_SET (gen_frame_mem
19779 gen_rtx_PRE_MODIFY (Pmode
,
19782 (Pmode
, stack_pointer_rtx
,
19785 gen_rtx_UNSPEC (BLKmode
,
19786 gen_rtvec (1, reg
),
19787 UNSPEC_PUSH_MULT
));
19789 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19790 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
19791 RTX_FRAME_RELATED_P (tmp
) = 1;
19792 XVECEXP (dwarf
, 0, 0) = tmp
;
19794 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
19795 RTX_FRAME_RELATED_P (tmp
) = 1;
19796 XVECEXP (dwarf
, 0, 1) = tmp
;
19798 for (i
= 1; i
< count
; i
++)
19800 reg
= gen_rtx_REG (DFmode
, base_reg
);
19802 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
19804 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
19805 plus_constant (Pmode
,
19809 RTX_FRAME_RELATED_P (tmp
) = 1;
19810 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
19813 par
= emit_insn (par
);
19814 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
19815 RTX_FRAME_RELATED_P (par
) = 1;
19820 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19821 has the cmse_nonsecure_call attribute and returns false otherwise. */
19824 detect_cmse_nonsecure_call (tree addr
)
19829 tree fntype
= TREE_TYPE (addr
);
19830 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
19831 TYPE_ATTRIBUTES (fntype
)))
19837 /* Emit a call instruction with pattern PAT. ADDR is the address of
19838 the call target. */
19841 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
19845 insn
= emit_call_insn (pat
);
19847 /* The PIC register is live on entry to VxWorks PIC PLT entries.
19848 If the call might use such an entry, add a use of the PIC register
19849 to the instruction's CALL_INSN_FUNCTION_USAGE. */
19850 if (TARGET_VXWORKS_RTP
19853 && SYMBOL_REF_P (addr
)
19854 && (SYMBOL_REF_DECL (addr
)
19855 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
19856 : !SYMBOL_REF_LOCAL_P (addr
)))
19858 require_pic_register (NULL_RTX
, false /*compute_now*/);
19859 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
19864 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
19865 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), fdpic_reg
);
19868 if (TARGET_AAPCS_BASED
)
19870 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19871 linker. We need to add an IP clobber to allow setting
19872 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
19873 is not needed since it's a fixed register. */
19874 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
19875 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
19879 /* Output a 'call' insn. */
19881 output_call (rtx
*operands
)
19883 gcc_assert (!arm_arch5t
); /* Patterns should call blx <reg> directly. */
19885 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
19886 if (REGNO (operands
[0]) == LR_REGNUM
)
19888 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
19889 output_asm_insn ("mov%?\t%0, %|lr", operands
);
19892 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
19894 if (TARGET_INTERWORK
|| arm_arch4t
)
19895 output_asm_insn ("bx%?\t%0", operands
);
19897 output_asm_insn ("mov%?\t%|pc, %0", operands
);
19902 /* Output a move from arm registers to arm registers of a long double
19903 OPERANDS[0] is the destination.
19904 OPERANDS[1] is the source. */
19906 output_mov_long_double_arm_from_arm (rtx
*operands
)
19908 /* We have to be careful here because the two might overlap. */
19909 int dest_start
= REGNO (operands
[0]);
19910 int src_start
= REGNO (operands
[1]);
19914 if (dest_start
< src_start
)
19916 for (i
= 0; i
< 3; i
++)
19918 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
19919 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
19920 output_asm_insn ("mov%?\t%0, %1", ops
);
19925 for (i
= 2; i
>= 0; i
--)
19927 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
19928 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
19929 output_asm_insn ("mov%?\t%0, %1", ops
);
19937 arm_emit_movpair (rtx dest
, rtx src
)
19939 /* If the src is an immediate, simplify it. */
19940 if (CONST_INT_P (src
))
19942 HOST_WIDE_INT val
= INTVAL (src
);
19943 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
19944 if ((val
>> 16) & 0x0000ffff)
19946 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
19948 GEN_INT ((val
>> 16) & 0x0000ffff));
19949 rtx_insn
*insn
= get_last_insn ();
19950 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
19954 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
19955 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
19956 rtx_insn
*insn
= get_last_insn ();
19957 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
19960 /* Output a move between double words. It must be REG<-MEM
19963 output_move_double (rtx
*operands
, bool emit
, int *count
)
19965 enum rtx_code code0
= GET_CODE (operands
[0]);
19966 enum rtx_code code1
= GET_CODE (operands
[1]);
19971 /* The only case when this might happen is when
19972 you are looking at the length of a DImode instruction
19973 that has an invalid constant in it. */
19974 if (code0
== REG
&& code1
!= MEM
)
19976 gcc_assert (!emit
);
19983 unsigned int reg0
= REGNO (operands
[0]);
19984 const bool can_ldrd
= TARGET_LDRD
&& (TARGET_THUMB2
|| (reg0
% 2 == 0));
19986 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
19988 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
19990 switch (GET_CODE (XEXP (operands
[1], 0)))
19997 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
19998 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
20000 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
20005 gcc_assert (can_ldrd
);
20007 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
20014 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
20016 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
20024 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
20026 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
20031 gcc_assert (can_ldrd
);
20033 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
20038 /* Autoicrement addressing modes should never have overlapping
20039 base and destination registers, and overlapping index registers
20040 are already prohibited, so this doesn't need to worry about
20042 otherops
[0] = operands
[0];
20043 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
20044 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
20046 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
20048 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
20050 /* Registers overlap so split out the increment. */
20053 gcc_assert (can_ldrd
);
20054 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
20055 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
20062 /* Use a single insn if we can.
20063 FIXME: IWMMXT allows offsets larger than ldrd can
20064 handle, fix these up with a pair of ldr. */
20067 || !CONST_INT_P (otherops
[2])
20068 || (INTVAL (otherops
[2]) > -256
20069 && INTVAL (otherops
[2]) < 256)))
20072 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
20078 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
20079 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
20089 /* Use a single insn if we can.
20090 FIXME: IWMMXT allows offsets larger than ldrd can handle,
20091 fix these up with a pair of ldr. */
20094 || !CONST_INT_P (otherops
[2])
20095 || (INTVAL (otherops
[2]) > -256
20096 && INTVAL (otherops
[2]) < 256)))
20099 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
20105 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
20106 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
20116 /* We might be able to use ldrd %0, %1 here. However the range is
20117 different to ldr/adr, and it is broken on some ARMv7-M
20118 implementations. */
20119 /* Use the second register of the pair to avoid problematic
20121 otherops
[1] = operands
[1];
20123 output_asm_insn ("adr%?\t%0, %1", otherops
);
20124 operands
[1] = otherops
[0];
20128 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
20130 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
20137 /* ??? This needs checking for thumb2. */
20139 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
20140 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
20142 otherops
[0] = operands
[0];
20143 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
20144 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
20146 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
20148 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
20150 switch ((int) INTVAL (otherops
[2]))
20154 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
20160 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
20166 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
20170 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
20171 operands
[1] = otherops
[0];
20173 && (REG_P (otherops
[2])
20175 || (CONST_INT_P (otherops
[2])
20176 && INTVAL (otherops
[2]) > -256
20177 && INTVAL (otherops
[2]) < 256)))
20179 if (reg_overlap_mentioned_p (operands
[0],
20182 /* Swap base and index registers over to
20183 avoid a conflict. */
20184 std::swap (otherops
[1], otherops
[2]);
20186 /* If both registers conflict, it will usually
20187 have been fixed by a splitter. */
20188 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
20189 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
20193 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20194 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
20201 otherops
[0] = operands
[0];
20203 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
20208 if (CONST_INT_P (otherops
[2]))
20212 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
20213 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
20215 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20221 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20227 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
20234 return "ldrd%?\t%0, [%1]";
20236 return "ldmia%?\t%1, %M0";
20240 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
20241 /* Take care of overlapping base/data reg. */
20242 if (reg_mentioned_p (operands
[0], operands
[1]))
20246 output_asm_insn ("ldr%?\t%0, %1", otherops
);
20247 output_asm_insn ("ldr%?\t%0, %1", operands
);
20257 output_asm_insn ("ldr%?\t%0, %1", operands
);
20258 output_asm_insn ("ldr%?\t%0, %1", otherops
);
20268 /* Constraints should ensure this. */
20269 gcc_assert (code0
== MEM
&& code1
== REG
);
20270 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
20271 || (TARGET_ARM
&& TARGET_LDRD
));
20273 /* For TARGET_ARM the first source register of an STRD
20274 must be even. This is usually the case for double-word
20275 values but user assembly constraints can force an odd
20276 starting register. */
20277 bool allow_strd
= TARGET_LDRD
20278 && !(TARGET_ARM
&& (REGNO (operands
[1]) & 1) == 1);
20279 switch (GET_CODE (XEXP (operands
[0], 0)))
20285 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
20287 output_asm_insn ("stm%?\t%m0, %M1", operands
);
20292 gcc_assert (allow_strd
);
20294 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
20301 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
20303 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
20311 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
20313 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
20318 gcc_assert (allow_strd
);
20320 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
20325 otherops
[0] = operands
[1];
20326 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
20327 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
20329 /* IWMMXT allows offsets larger than strd can handle,
20330 fix these up with a pair of str. */
20332 && CONST_INT_P (otherops
[2])
20333 && (INTVAL(otherops
[2]) <= -256
20334 || INTVAL(otherops
[2]) >= 256))
20336 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
20340 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
20341 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
20350 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
20351 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
20357 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
20360 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
20365 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
20370 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
20371 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
20373 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
20377 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
20384 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
20391 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
20396 && (REG_P (otherops
[2])
20398 || (CONST_INT_P (otherops
[2])
20399 && INTVAL (otherops
[2]) > -256
20400 && INTVAL (otherops
[2]) < 256)))
20402 otherops
[0] = operands
[1];
20403 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
20405 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
20411 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
20412 otherops
[1] = operands
[1];
20415 output_asm_insn ("str%?\t%1, %0", operands
);
20416 output_asm_insn ("str%?\t%H1, %0", otherops
);
20426 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20427 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20430 output_move_quad (rtx
*operands
)
20432 if (REG_P (operands
[0]))
20434 /* Load, or reg->reg move. */
20436 if (MEM_P (operands
[1]))
20438 switch (GET_CODE (XEXP (operands
[1], 0)))
20441 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
20446 output_asm_insn ("adr%?\t%0, %1", operands
);
20447 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
20451 gcc_unreachable ();
20459 gcc_assert (REG_P (operands
[1]));
20461 dest
= REGNO (operands
[0]);
20462 src
= REGNO (operands
[1]);
20464 /* This seems pretty dumb, but hopefully GCC won't try to do it
20467 for (i
= 0; i
< 4; i
++)
20469 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
20470 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
20471 output_asm_insn ("mov%?\t%0, %1", ops
);
20474 for (i
= 3; i
>= 0; i
--)
20476 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
20477 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
20478 output_asm_insn ("mov%?\t%0, %1", ops
);
20484 gcc_assert (MEM_P (operands
[0]));
20485 gcc_assert (REG_P (operands
[1]));
20486 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
20488 switch (GET_CODE (XEXP (operands
[0], 0)))
20491 output_asm_insn ("stm%?\t%m0, %M1", operands
);
20495 gcc_unreachable ();
20502 /* Output a VFP load or store instruction. */
20505 output_move_vfp (rtx
*operands
)
20507 rtx reg
, mem
, addr
, ops
[2];
20508 int load
= REG_P (operands
[0]);
20509 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
20510 int sp
= (!TARGET_VFP_FP16INST
20511 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
20512 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
20517 reg
= operands
[!load
];
20518 mem
= operands
[load
];
20520 mode
= GET_MODE (reg
);
20522 gcc_assert (REG_P (reg
));
20523 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
20524 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
20530 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
20531 gcc_assert (MEM_P (mem
));
20533 addr
= XEXP (mem
, 0);
20535 switch (GET_CODE (addr
))
20538 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20539 ops
[0] = XEXP (addr
, 0);
20544 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20545 ops
[0] = XEXP (addr
, 0);
20550 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
20556 sprintf (buff
, templ
,
20557 load
? "ld" : "st",
20558 dp
? "64" : sp
? "32" : "16",
20560 integer_p
? "\t%@ int" : "");
20561 output_asm_insn (buff
, ops
);
20566 /* Output a Neon double-word or quad-word load or store, or a load
20567 or store for larger structure modes.
20569 WARNING: The ordering of elements is weird in big-endian mode,
20570 because the EABI requires that vectors stored in memory appear
20571 as though they were stored by a VSTM, as required by the EABI.
20572 GCC RTL defines element ordering based on in-memory order.
20573 This can be different from the architectural ordering of elements
20574 within a NEON register. The intrinsics defined in arm_neon.h use the
20575 NEON register element ordering, not the GCC RTL element ordering.
20577 For example, the in-memory ordering of a big-endian a quadword
20578 vector with 16-bit elements when stored from register pair {d0,d1}
20579 will be (lowest address first, d0[N] is NEON register element N):
20581 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20583 When necessary, quadword registers (dN, dN+1) are moved to ARM
20584 registers from rN in the order:
20586 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20588 So that STM/LDM can be used on vectors in ARM registers, and the
20589 same memory layout will result as if VSTM/VLDM were used.
20591 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20592 possible, which allows use of appropriate alignment tags.
20593 Note that the choice of "64" is independent of the actual vector
20594 element size; this size simply ensures that the behavior is
20595 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20597 Due to limitations of those instructions, use of VST1.64/VLD1.64
20598 is not possible if:
20599 - the address contains PRE_DEC, or
20600 - the mode refers to more than 4 double-word registers
20602 In those cases, it would be possible to replace VSTM/VLDM by a
20603 sequence of instructions; this is not currently implemented since
20604 this is not certain to actually improve performance. */
20607 output_move_neon (rtx
*operands
)
20609 rtx reg
, mem
, addr
, ops
[2];
20610 int regno
, nregs
, load
= REG_P (operands
[0]);
20615 reg
= operands
[!load
];
20616 mem
= operands
[load
];
20618 mode
= GET_MODE (reg
);
20620 gcc_assert (REG_P (reg
));
20621 regno
= REGNO (reg
);
20622 nregs
= REG_NREGS (reg
) / 2;
20623 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
20624 || NEON_REGNO_OK_FOR_QUAD (regno
));
20625 gcc_assert (VALID_NEON_DREG_MODE (mode
)
20626 || VALID_NEON_QREG_MODE (mode
)
20627 || VALID_NEON_STRUCT_MODE (mode
));
20628 gcc_assert (MEM_P (mem
));
20630 addr
= XEXP (mem
, 0);
20632 /* Strip off const from addresses like (const (plus (...))). */
20633 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
20634 addr
= XEXP (addr
, 0);
20636 switch (GET_CODE (addr
))
20639 /* We have to use vldm / vstm for too-large modes. */
20640 if (nregs
> 4 || (TARGET_HAVE_MVE
&& nregs
>= 2))
20642 templ
= "v%smia%%?\t%%0!, %%h1";
20643 ops
[0] = XEXP (addr
, 0);
20647 templ
= "v%s1.64\t%%h1, %%A0";
20654 /* We have to use vldm / vstm in this case, since there is no
20655 pre-decrement form of the vld1 / vst1 instructions. */
20656 templ
= "v%smdb%%?\t%%0!, %%h1";
20657 ops
[0] = XEXP (addr
, 0);
20662 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20663 gcc_unreachable ();
20666 /* We have to use vldm / vstm for too-large modes. */
20669 if (nregs
> 4 || (TARGET_HAVE_MVE
&& nregs
>= 2))
20670 templ
= "v%smia%%?\t%%m0, %%h1";
20672 templ
= "v%s1.64\t%%h1, %%A0";
20678 /* Fall through. */
20680 if (GET_CODE (addr
) == PLUS
)
20681 addr
= XEXP (addr
, 0);
20682 /* Fall through. */
20687 for (i
= 0; i
< nregs
; i
++)
20689 /* We're only using DImode here because it's a convenient
20691 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
20692 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
20693 if (reg_overlap_mentioned_p (ops
[0], mem
))
20695 gcc_assert (overlap
== -1);
20700 if (TARGET_HAVE_MVE
&& LABEL_REF_P (addr
))
20701 sprintf (buff
, "v%sr.64\t%%P0, %%1", load
? "ld" : "st");
20703 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
20704 output_asm_insn (buff
, ops
);
20709 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
20710 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
20711 if (TARGET_HAVE_MVE
&& LABEL_REF_P (addr
))
20712 sprintf (buff
, "v%sr.32\t%%P0, %%1", load
? "ld" : "st");
20714 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
20715 output_asm_insn (buff
, ops
);
20722 gcc_unreachable ();
20725 sprintf (buff
, templ
, load
? "ld" : "st");
20726 output_asm_insn (buff
, ops
);
20731 /* Compute and return the length of neon_mov<mode>, where <mode> is
20732 one of VSTRUCT modes: EI, OI, CI or XI. */
20734 arm_attr_length_move_neon (rtx_insn
*insn
)
20736 rtx reg
, mem
, addr
;
20740 extract_insn_cached (insn
);
20742 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
20744 mode
= GET_MODE (recog_data
.operand
[0]);
20755 gcc_unreachable ();
20759 load
= REG_P (recog_data
.operand
[0]);
20760 reg
= recog_data
.operand
[!load
];
20761 mem
= recog_data
.operand
[load
];
20763 gcc_assert (MEM_P (mem
));
20765 addr
= XEXP (mem
, 0);
20767 /* Strip off const from addresses like (const (plus (...))). */
20768 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
20769 addr
= XEXP (addr
, 0);
20771 if (LABEL_REF_P (addr
) || GET_CODE (addr
) == PLUS
)
20773 int insns
= REG_NREGS (reg
) / 2;
20780 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20784 arm_address_offset_is_imm (rtx_insn
*insn
)
20788 extract_insn_cached (insn
);
20790 if (REG_P (recog_data
.operand
[0]))
20793 mem
= recog_data
.operand
[0];
20795 gcc_assert (MEM_P (mem
));
20797 addr
= XEXP (mem
, 0);
20800 || (GET_CODE (addr
) == PLUS
20801 && REG_P (XEXP (addr
, 0))
20802 && CONST_INT_P (XEXP (addr
, 1))))
20808 /* Output an ADD r, s, #n where n may be too big for one instruction.
20809 If adding zero to one register, output nothing. */
20811 output_add_immediate (rtx
*operands
)
20813 HOST_WIDE_INT n
= INTVAL (operands
[2]);
20815 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
20818 output_multi_immediate (operands
,
20819 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20822 output_multi_immediate (operands
,
20823 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20830 /* Output a multiple immediate operation.
20831 OPERANDS is the vector of operands referred to in the output patterns.
20832 INSTR1 is the output pattern to use for the first constant.
20833 INSTR2 is the output pattern to use for subsequent constants.
20834 IMMED_OP is the index of the constant slot in OPERANDS.
20835 N is the constant value. */
20836 static const char *
20837 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
20838 int immed_op
, HOST_WIDE_INT n
)
20840 #if HOST_BITS_PER_WIDE_INT > 32
20846 /* Quick and easy output. */
20847 operands
[immed_op
] = const0_rtx
;
20848 output_asm_insn (instr1
, operands
);
20853 const char * instr
= instr1
;
20855 /* Note that n is never zero here (which would give no output). */
20856 for (i
= 0; i
< 32; i
+= 2)
20860 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
20861 output_asm_insn (instr
, operands
);
20871 /* Return the name of a shifter operation. */
20872 static const char *
20873 arm_shift_nmem(enum rtx_code code
)
20878 return ARM_LSL_NAME
;
20894 /* Return the appropriate ARM instruction for the operation code.
20895 The returned result should not be overwritten. OP is the rtx of the
20896 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20899 arithmetic_instr (rtx op
, int shift_first_arg
)
20901 switch (GET_CODE (op
))
20907 return shift_first_arg
? "rsb" : "sub";
20922 return arm_shift_nmem(GET_CODE(op
));
20925 gcc_unreachable ();
20929 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20930 for the operation code. The returned result should not be overwritten.
20931 OP is the rtx code of the shift.
20932 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20934 static const char *
20935 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
20938 enum rtx_code code
= GET_CODE (op
);
20943 if (!CONST_INT_P (XEXP (op
, 1)))
20945 output_operand_lossage ("invalid shift operand");
20950 *amountp
= 32 - INTVAL (XEXP (op
, 1));
20958 mnem
= arm_shift_nmem(code
);
20959 if (CONST_INT_P (XEXP (op
, 1)))
20961 *amountp
= INTVAL (XEXP (op
, 1));
20963 else if (REG_P (XEXP (op
, 1)))
20970 output_operand_lossage ("invalid shift operand");
20976 /* We never have to worry about the amount being other than a
20977 power of 2, since this case can never be reloaded from a reg. */
20978 if (!CONST_INT_P (XEXP (op
, 1)))
20980 output_operand_lossage ("invalid shift operand");
20984 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
20986 /* Amount must be a power of two. */
20987 if (*amountp
& (*amountp
- 1))
20989 output_operand_lossage ("invalid shift operand");
20993 *amountp
= exact_log2 (*amountp
);
20994 gcc_assert (IN_RANGE (*amountp
, 0, 31));
20995 return ARM_LSL_NAME
;
20998 output_operand_lossage ("invalid shift operand");
21002 /* This is not 100% correct, but follows from the desire to merge
21003 multiplication by a power of 2 with the recognizer for a
21004 shift. >=32 is not a valid shift for "lsl", so we must try and
21005 output a shift that produces the correct arithmetical result.
21006 Using lsr #32 is identical except for the fact that the carry bit
21007 is not set correctly if we set the flags; but we never use the
21008 carry bit from such an operation, so we can ignore that. */
21009 if (code
== ROTATERT
)
21010 /* Rotate is just modulo 32. */
21012 else if (*amountp
!= (*amountp
& 31))
21014 if (code
== ASHIFT
)
21019 /* Shifts of 0 are no-ops. */
21026 /* Output a .ascii pseudo-op, keeping track of lengths. This is
21027 because /bin/as is horribly restrictive. The judgement about
21028 whether or not each character is 'printable' (and can be output as
21029 is) or not (and must be printed with an octal escape) must be made
21030 with reference to the *host* character set -- the situation is
21031 similar to that discussed in the comments above pp_c_char in
21032 c-pretty-print.cc. */
21034 #define MAX_ASCII_LEN 51
21037 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
21040 int len_so_far
= 0;
21042 fputs ("\t.ascii\t\"", stream
);
21044 for (i
= 0; i
< len
; i
++)
21048 if (len_so_far
>= MAX_ASCII_LEN
)
21050 fputs ("\"\n\t.ascii\t\"", stream
);
21056 if (c
== '\\' || c
== '\"')
21058 putc ('\\', stream
);
21066 fprintf (stream
, "\\%03o", c
);
21071 fputs ("\"\n", stream
);
21075 /* Compute the register save mask for registers 0 through 12
21076 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
21078 static unsigned long
21079 arm_compute_save_reg0_reg12_mask (void)
21081 unsigned long func_type
= arm_current_func_type ();
21082 unsigned long save_reg_mask
= 0;
21085 if (IS_INTERRUPT (func_type
))
21087 unsigned int max_reg
;
21088 /* Interrupt functions must not corrupt any registers,
21089 even call clobbered ones. If this is a leaf function
21090 we can just examine the registers used by the RTL, but
21091 otherwise we have to assume that whatever function is
21092 called might clobber anything, and so we have to save
21093 all the call-clobbered registers as well. */
21094 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
21095 /* FIQ handlers have registers r8 - r12 banked, so
21096 we only need to check r0 - r7, Normal ISRs only
21097 bank r14 and r15, so we must check up to r12.
21098 r13 is the stack pointer which is always preserved,
21099 so we do not need to consider it here. */
21104 for (reg
= 0; reg
<= max_reg
; reg
++)
21105 if (reg_needs_saving_p (reg
))
21106 save_reg_mask
|= (1 << reg
);
21108 /* Also save the pic base register if necessary. */
21109 if (PIC_REGISTER_MAY_NEED_SAVING
21110 && crtl
->uses_pic_offset_table
)
21111 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21113 else if (IS_VOLATILE(func_type
))
21115 /* For noreturn functions we historically omitted register saves
21116 altogether. However this really messes up debugging. As a
21117 compromise save just the frame pointers. Combined with the link
21118 register saved elsewhere this should be sufficient to get
21120 if (frame_pointer_needed
)
21121 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21122 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
21123 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
21124 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
21125 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
21129 /* In the normal case we only need to save those registers
21130 which are call saved and which are used by this function. */
21131 for (reg
= 0; reg
<= 11; reg
++)
21132 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
21133 save_reg_mask
|= (1 << reg
);
21135 /* Handle the frame pointer as a special case. */
21136 if (frame_pointer_needed
)
21137 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21139 /* If we aren't loading the PIC register,
21140 don't stack it even though it may be live. */
21141 if (PIC_REGISTER_MAY_NEED_SAVING
21142 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
21143 || crtl
->uses_pic_offset_table
))
21144 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21146 /* The prologue will copy SP into R0, so save it. */
21147 if (IS_STACKALIGN (func_type
))
21148 save_reg_mask
|= 1;
21151 /* Save registers so the exception handler can modify them. */
21152 if (crtl
->calls_eh_return
)
21158 reg
= EH_RETURN_DATA_REGNO (i
);
21159 if (reg
== INVALID_REGNUM
)
21161 save_reg_mask
|= 1 << reg
;
21165 return save_reg_mask
;
21168 /* Return true if r3 is live at the start of the function. */
21171 arm_r3_live_at_start_p (void)
21173 /* Just look at cfg info, which is still close enough to correct at this
21174 point. This gives false positives for broken functions that might use
21175 uninitialized data that happens to be allocated in r3, but who cares? */
21176 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
21179 /* Compute the number of bytes used to store the static chain register on the
21180 stack, above the stack frame. We need to know this accurately to get the
21181 alignment of the rest of the stack frame correct. */
21184 arm_compute_static_chain_stack_bytes (void)
21186 /* Once the value is updated from the init value of -1, do not
21188 if (cfun
->machine
->static_chain_stack_bytes
!= -1)
21189 return cfun
->machine
->static_chain_stack_bytes
;
21191 /* See the defining assertion in arm_expand_prologue. */
21192 if (IS_NESTED (arm_current_func_type ())
21193 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21194 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21195 || flag_stack_clash_protection
)
21196 && !df_regs_ever_live_p (LR_REGNUM
)))
21197 && arm_r3_live_at_start_p ()
21198 && crtl
->args
.pretend_args_size
== 0)
21204 /* Compute a bit mask of which core registers need to be
21205 saved on the stack for the current function.
21206 This is used by arm_compute_frame_layout, which may add extra registers. */
21208 static unsigned long
21209 arm_compute_save_core_reg_mask (void)
21211 unsigned int save_reg_mask
= 0;
21212 unsigned long func_type
= arm_current_func_type ();
21215 if (IS_NAKED (func_type
))
21216 /* This should never really happen. */
21219 /* If we are creating a stack frame, then we must save the frame pointer,
21220 IP (which will hold the old stack pointer), LR and the PC. */
21221 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21223 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
21226 | (1 << PC_REGNUM
);
21228 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
21230 /* Decide if we need to save the link register.
21231 Interrupt routines have their own banked link register,
21232 so they never need to save it.
21233 Otherwise if we do not use the link register we do not need to save
21234 it. If we are pushing other registers onto the stack however, we
21235 can save an instruction in the epilogue by pushing the link register
21236 now and then popping it back into the PC. This incurs extra memory
21237 accesses though, so we only do it when optimizing for size, and only
21238 if we know that we will not need a fancy return sequence. */
21239 if (df_regs_ever_live_p (LR_REGNUM
)
21242 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
21243 && !crtl
->tail_call_emit
21244 && !crtl
->calls_eh_return
))
21245 save_reg_mask
|= 1 << LR_REGNUM
;
21247 if (cfun
->machine
->lr_save_eliminated
)
21248 save_reg_mask
&= ~ (1 << LR_REGNUM
);
21250 if (TARGET_REALLY_IWMMXT
21251 && ((bit_count (save_reg_mask
)
21252 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
21253 arm_compute_static_chain_stack_bytes())
21256 /* The total number of registers that are going to be pushed
21257 onto the stack is odd. We need to ensure that the stack
21258 is 64-bit aligned before we start to save iWMMXt registers,
21259 and also before we start to create locals. (A local variable
21260 might be a double or long long which we will load/store using
21261 an iWMMXt instruction). Therefore we need to push another
21262 ARM register, so that the stack will be 64-bit aligned. We
21263 try to avoid using the arg registers (r0 -r3) as they might be
21264 used to pass values in a tail call. */
21265 for (reg
= 4; reg
<= 12; reg
++)
21266 if ((save_reg_mask
& (1 << reg
)) == 0)
21270 save_reg_mask
|= (1 << reg
);
21273 cfun
->machine
->sibcall_blocked
= 1;
21274 save_reg_mask
|= (1 << 3);
21278 /* We may need to push an additional register for use initializing the
21279 PIC base register. */
21280 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
21281 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
21283 reg
= thumb_find_work_register (1 << 4);
21284 if (!call_used_or_fixed_reg_p (reg
))
21285 save_reg_mask
|= (1 << reg
);
21288 return save_reg_mask
;
21291 /* Compute a bit mask of which core registers need to be
21292 saved on the stack for the current function. */
21293 static unsigned long
21294 thumb1_compute_save_core_reg_mask (void)
21296 unsigned long mask
;
21300 for (reg
= 0; reg
< 12; reg
++)
21301 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
21304 /* Handle the frame pointer as a special case. */
21305 if (frame_pointer_needed
)
21306 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21309 && !TARGET_SINGLE_PIC_BASE
21310 && arm_pic_register
!= INVALID_REGNUM
21311 && crtl
->uses_pic_offset_table
)
21312 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21314 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21315 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
21316 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
21318 /* LR will also be pushed if any lo regs are pushed. */
21319 if (mask
& 0xff || thumb_force_lr_save ())
21320 mask
|= (1 << LR_REGNUM
);
21322 bool call_clobbered_scratch
21323 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21324 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21326 /* Make sure we have a low work register if we need one. We will
21327 need one if we are going to push a high register, but we are not
21328 currently intending to push a low register. However if both the
21329 prologue and epilogue have a spare call-clobbered low register,
21330 then we won't need to find an additional work register. It does
21331 not need to be the same register in the prologue and
21333 if ((mask
& 0xff) == 0
21334 && !call_clobbered_scratch
21335 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
21337 /* Use thumb_find_work_register to choose which register
21338 we will use. If the register is live then we will
21339 have to push it. Use LAST_LO_REGNUM as our fallback
21340 choice for the register to select. */
21341 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
21342 /* Make sure the register returned by thumb_find_work_register is
21343 not part of the return value. */
21344 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
21345 reg
= LAST_LO_REGNUM
;
21347 if (callee_saved_reg_p (reg
))
21351 /* The 504 below is 8 bytes less than 512 because there are two possible
21352 alignment words. We can't tell here if they will be present or not so we
21353 have to play it safe and assume that they are. */
21354 if ((CALLER_INTERWORKING_SLOT_SIZE
+
21355 ROUND_UP_WORD (get_frame_size ()) +
21356 crtl
->outgoing_args_size
) >= 504)
21358 /* This is the same as the code in thumb1_expand_prologue() which
21359 determines which register to use for stack decrement. */
21360 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
21361 if (mask
& (1 << reg
))
21364 if (reg
> LAST_LO_REGNUM
)
21366 /* Make sure we have a register available for stack decrement. */
21367 mask
|= 1 << LAST_LO_REGNUM
;
21374 /* Return the number of bytes required to save VFP registers. */
21376 arm_get_vfp_saved_size (void)
21378 unsigned int regno
;
21383 /* Space for saved VFP registers. */
21384 if (TARGET_VFP_BASE
)
21387 for (regno
= FIRST_VFP_REGNUM
;
21388 regno
< LAST_VFP_REGNUM
;
21391 if (!reg_needs_saving_p (regno
) && !reg_needs_saving_p (regno
+ 1))
21395 /* Workaround ARM10 VFPr1 bug. */
21396 if (count
== 2 && !arm_arch6
)
21398 saved
+= count
* 8;
21407 if (count
== 2 && !arm_arch6
)
21409 saved
+= count
* 8;
21416 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21417 everything bar the final return instruction. If simple_return is true,
21418 then do not output epilogue, because it has already been emitted in RTL.
21420 Note: do not forget to update length attribute of corresponding insn pattern
21421 when changing assembly output (eg. length attribute of
21422 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21423 register clearing sequences). */
21425 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
21426 bool simple_return
)
21428 char conditional
[10];
21431 unsigned long live_regs_mask
;
21432 unsigned long func_type
;
21433 arm_stack_offsets
*offsets
;
21435 func_type
= arm_current_func_type ();
21437 if (IS_NAKED (func_type
))
21440 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
21442 /* If this function was declared non-returning, and we have
21443 found a tail call, then we have to trust that the called
21444 function won't return. */
21449 /* Otherwise, trap an attempted return by aborting. */
21451 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
21453 assemble_external_libcall (ops
[1]);
21454 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
21460 gcc_assert (!cfun
->calls_alloca
|| really_return
);
21462 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
21464 cfun
->machine
->return_used_this_function
= 1;
21466 offsets
= arm_get_frame_offsets ();
21467 live_regs_mask
= offsets
->saved_regs_mask
;
21469 if (!simple_return
&& live_regs_mask
)
21471 const char * return_reg
;
21473 /* If we do not have any special requirements for function exit
21474 (e.g. interworking) then we can load the return address
21475 directly into the PC. Otherwise we must load it into LR. */
21477 && !IS_CMSE_ENTRY (func_type
)
21478 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
21479 return_reg
= reg_names
[PC_REGNUM
];
21481 return_reg
= reg_names
[LR_REGNUM
];
21483 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
21485 /* There are three possible reasons for the IP register
21486 being saved. 1) a stack frame was created, in which case
21487 IP contains the old stack pointer, or 2) an ISR routine
21488 corrupted it, or 3) it was saved to align the stack on
21489 iWMMXt. In case 1, restore IP into SP, otherwise just
21491 if (frame_pointer_needed
)
21493 live_regs_mask
&= ~ (1 << IP_REGNUM
);
21494 live_regs_mask
|= (1 << SP_REGNUM
);
21497 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
21500 /* On some ARM architectures it is faster to use LDR rather than
21501 LDM to load a single register. On other architectures, the
21502 cost is the same. In 26 bit mode, or for exception handlers,
21503 we have to use LDM to load the PC so that the CPSR is also
21505 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
21506 if (live_regs_mask
== (1U << reg
))
21509 if (reg
<= LAST_ARM_REGNUM
21510 && (reg
!= LR_REGNUM
21512 || ! IS_INTERRUPT (func_type
)))
21514 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
21515 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
21522 /* Generate the load multiple instruction to restore the
21523 registers. Note we can get here, even if
21524 frame_pointer_needed is true, but only if sp already
21525 points to the base of the saved core registers. */
21526 if (live_regs_mask
& (1 << SP_REGNUM
))
21528 unsigned HOST_WIDE_INT stack_adjust
;
21530 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
21531 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
21533 if (stack_adjust
&& arm_arch5t
&& TARGET_ARM
)
21534 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
21537 /* If we can't use ldmib (SA110 bug),
21538 then try to pop r3 instead. */
21540 live_regs_mask
|= 1 << 3;
21542 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
21545 /* For interrupt returns we have to use an LDM rather than
21546 a POP so that we can use the exception return variant. */
21547 else if (IS_INTERRUPT (func_type
))
21548 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
21550 sprintf (instr
, "pop%s\t{", conditional
);
21552 p
= instr
+ strlen (instr
);
21554 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
21555 if (live_regs_mask
& (1 << reg
))
21557 int l
= strlen (reg_names
[reg
]);
21563 memcpy (p
, ", ", 2);
21567 memcpy (p
, "%|", 2);
21568 memcpy (p
+ 2, reg_names
[reg
], l
);
21572 if (live_regs_mask
& (1 << LR_REGNUM
))
21574 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
21575 /* If returning from an interrupt, restore the CPSR. */
21576 if (IS_INTERRUPT (func_type
))
21583 output_asm_insn (instr
, & operand
);
21585 /* See if we need to generate an extra instruction to
21586 perform the actual function return. */
21588 && func_type
!= ARM_FT_INTERWORKED
21589 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
21591 /* The return has already been handled
21592 by loading the LR into the PC. */
21599 switch ((int) ARM_FUNC_TYPE (func_type
))
21603 /* ??? This is wrong for unified assembly syntax. */
21604 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
21607 case ARM_FT_INTERWORKED
:
21608 gcc_assert (arm_arch5t
|| arm_arch4t
);
21609 sprintf (instr
, "bx%s\t%%|lr", conditional
);
21612 case ARM_FT_EXCEPTION
:
21613 /* ??? This is wrong for unified assembly syntax. */
21614 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
21618 if (IS_CMSE_ENTRY (func_type
))
21620 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21621 emitted by cmse_nonsecure_entry_clear_before_return () and the
21622 VSTR/VLDR instructions in the prologue and epilogue. */
21623 if (!TARGET_HAVE_FPCXT_CMSE
)
21625 /* Check if we have to clear the 'GE bits' which is only used if
21626 parallel add and subtraction instructions are available. */
21627 if (TARGET_INT_SIMD
)
21628 snprintf (instr
, sizeof (instr
),
21629 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
21631 snprintf (instr
, sizeof (instr
),
21632 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
21634 output_asm_insn (instr
, & operand
);
21635 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21637 if (TARGET_HARD_FLOAT
)
21639 /* Clear the cumulative exception-status bits (0-4,7) and
21640 the condition code bits (28-31) of the FPSCR. We need
21641 to remember to clear the first scratch register used
21642 (IP) and save and restore the second (r4).
21644 Important note: the length of the
21645 thumb2_cmse_entry_return insn pattern must account for
21646 the size of the below instructions. */
21647 output_asm_insn ("push\t{%|r4}", & operand
);
21648 output_asm_insn ("vmrs\t%|ip, fpscr", & operand
);
21649 output_asm_insn ("movw\t%|r4, #65376", & operand
);
21650 output_asm_insn ("movt\t%|r4, #4095", & operand
);
21651 output_asm_insn ("and\t%|ip, %|r4", & operand
);
21652 output_asm_insn ("vmsr\tfpscr, %|ip", & operand
);
21653 output_asm_insn ("pop\t{%|r4}", & operand
);
21654 output_asm_insn ("mov\t%|ip, %|lr", & operand
);
21657 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
21659 /* Use bx if it's available. */
21660 else if (arm_arch5t
|| arm_arch4t
)
21661 sprintf (instr
, "bx%s\t%%|lr", conditional
);
21663 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
21667 output_asm_insn (instr
, & operand
);
21673 /* Output in FILE asm statements needed to declare the NAME of the function
21674 defined by its DECL node. */
21677 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
21679 size_t cmse_name_len
;
21680 char *cmse_name
= 0;
21681 char cmse_prefix
[] = "__acle_se_";
21683 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21684 extra function label for each function with the 'cmse_nonsecure_entry'
21685 attribute. This extra function label should be prepended with
21686 '__acle_se_', telling the linker that it needs to create secure gateway
21687 veneers for this function. */
21688 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
21689 DECL_ATTRIBUTES (decl
)))
21691 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
21692 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
21693 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
21694 targetm
.asm_out
.globalize_label (file
, cmse_name
);
21696 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
21697 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
21700 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
21701 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21702 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21703 ASM_OUTPUT_LABEL (file
, name
);
21706 ASM_OUTPUT_LABEL (file
, cmse_name
);
21708 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
21711 /* Write the function name into the code section, directly preceding
21712 the function prologue.
21714 Code will be output similar to this:
21716 .ascii "arm_poke_function_name", 0
21719 .word 0xff000000 + (t1 - t0)
21720 arm_poke_function_name
21722 stmfd sp!, {fp, ip, lr, pc}
21725 When performing a stack backtrace, code can inspect the value
21726 of 'pc' stored at 'fp' + 0. If the trace function then looks
21727 at location pc - 12 and the top 8 bits are set, then we know
21728 that there is a function name embedded immediately preceding this
21729 location and has length ((pc[-3]) & 0xff000000).
21731 We assume that pc is declared as a pointer to an unsigned long.
21733 It is of no benefit to output the function name if we are assembling
21734 a leaf function. These function types will not contain a stack
21735 backtrace structure, therefore it is not possible to determine the
21738 arm_poke_function_name (FILE *stream
, const char *name
)
21740 unsigned long alignlength
;
21741 unsigned long length
;
21744 length
= strlen (name
) + 1;
21745 alignlength
= ROUND_UP_WORD (length
);
21747 ASM_OUTPUT_ASCII (stream
, name
, length
);
21748 ASM_OUTPUT_ALIGN (stream
, 2);
21749 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
21750 assemble_aligned_integer (UNITS_PER_WORD
, x
);
21753 /* Place some comments into the assembler stream
21754 describing the current function. */
21756 arm_output_function_prologue (FILE *f
)
21758 unsigned long func_type
;
21760 /* Sanity check. */
21761 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
21763 func_type
= arm_current_func_type ();
21765 switch ((int) ARM_FUNC_TYPE (func_type
))
21768 case ARM_FT_NORMAL
:
21770 case ARM_FT_INTERWORKED
:
21771 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
21774 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
21777 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
21779 case ARM_FT_EXCEPTION
:
21780 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
21784 if (IS_NAKED (func_type
))
21785 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21787 if (IS_VOLATILE (func_type
))
21788 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
21790 if (IS_NESTED (func_type
))
21791 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
21792 if (IS_STACKALIGN (func_type
))
21793 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21794 if (IS_CMSE_ENTRY (func_type
))
21795 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
21797 asm_fprintf (f
, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21798 (HOST_WIDE_INT
) crtl
->args
.size
,
21799 crtl
->args
.pretend_args_size
,
21800 (HOST_WIDE_INT
) get_frame_size ());
21802 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21803 frame_pointer_needed
,
21804 cfun
->machine
->uses_anonymous_args
);
21806 if (cfun
->machine
->lr_save_eliminated
)
21807 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
21809 if (crtl
->calls_eh_return
)
21810 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
21815 arm_output_function_epilogue (FILE *)
21817 arm_stack_offsets
*offsets
;
21823 /* Emit any call-via-reg trampolines that are needed for v4t support
21824 of call_reg and call_value_reg type insns. */
21825 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
21827 rtx label
= cfun
->machine
->call_via
[regno
];
21831 switch_to_section (function_section (current_function_decl
));
21832 targetm
.asm_out
.internal_label (asm_out_file
, "L",
21833 CODE_LABEL_NUMBER (label
));
21834 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
21838 /* ??? Probably not safe to set this here, since it assumes that a
21839 function will be emitted as assembly immediately after we generate
21840 RTL for it. This does not happen for inline functions. */
21841 cfun
->machine
->return_used_this_function
= 0;
21843 else /* TARGET_32BIT */
21845 /* We need to take into account any stack-frame rounding. */
21846 offsets
= arm_get_frame_offsets ();
21848 gcc_assert (!use_return_insn (FALSE
, NULL
)
21849 || (cfun
->machine
->return_used_this_function
!= 0)
21850 || offsets
->saved_regs
== offsets
->outgoing_args
21851 || frame_pointer_needed
);
21855 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21856 STR and STRD. If an even number of registers are being pushed, one
21857 or more STRD patterns are created for each register pair. If an
21858 odd number of registers are pushed, emit an initial STR followed by
21859 as many STRD instructions as are needed. This works best when the
21860 stack is initially 64-bit aligned (the normal case), since it
21861 ensures that each STRD is also 64-bit aligned. */
21863 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
21868 rtx par
= NULL_RTX
;
21869 rtx dwarf
= NULL_RTX
;
21873 num_regs
= bit_count (saved_regs_mask
);
21875 /* Must be at least one register to save, and can't save SP or PC. */
21876 gcc_assert (num_regs
> 0 && num_regs
<= 14);
21877 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
21878 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
21880 /* Create sequence for DWARF info. All the frame-related data for
21881 debugging is held in this wrapper. */
21882 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
21884 /* Describe the stack adjustment. */
21885 tmp
= gen_rtx_SET (stack_pointer_rtx
,
21886 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
21887 RTX_FRAME_RELATED_P (tmp
) = 1;
21888 XVECEXP (dwarf
, 0, 0) = tmp
;
21890 /* Find the first register. */
21891 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
21896 /* If there's an odd number of registers to push. Start off by
21897 pushing a single register. This ensures that subsequent strd
21898 operations are dword aligned (assuming that SP was originally
21899 64-bit aligned). */
21900 if ((num_regs
& 1) != 0)
21902 rtx reg
, mem
, insn
;
21904 reg
= gen_rtx_REG (SImode
, regno
);
21906 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
21907 stack_pointer_rtx
));
21909 mem
= gen_frame_mem (Pmode
,
21911 (Pmode
, stack_pointer_rtx
,
21912 plus_constant (Pmode
, stack_pointer_rtx
,
21915 tmp
= gen_rtx_SET (mem
, reg
);
21916 RTX_FRAME_RELATED_P (tmp
) = 1;
21917 insn
= emit_insn (tmp
);
21918 RTX_FRAME_RELATED_P (insn
) = 1;
21919 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21920 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
21921 RTX_FRAME_RELATED_P (tmp
) = 1;
21924 XVECEXP (dwarf
, 0, i
) = tmp
;
21928 while (i
< num_regs
)
21929 if (saved_regs_mask
& (1 << regno
))
21931 rtx reg1
, reg2
, mem1
, mem2
;
21932 rtx tmp0
, tmp1
, tmp2
;
21935 /* Find the register to pair with this one. */
21936 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
21940 reg1
= gen_rtx_REG (SImode
, regno
);
21941 reg2
= gen_rtx_REG (SImode
, regno2
);
21948 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
21951 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
21953 -4 * (num_regs
- 1)));
21954 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
21955 plus_constant (Pmode
, stack_pointer_rtx
,
21957 tmp1
= gen_rtx_SET (mem1
, reg1
);
21958 tmp2
= gen_rtx_SET (mem2
, reg2
);
21959 RTX_FRAME_RELATED_P (tmp0
) = 1;
21960 RTX_FRAME_RELATED_P (tmp1
) = 1;
21961 RTX_FRAME_RELATED_P (tmp2
) = 1;
21962 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
21963 XVECEXP (par
, 0, 0) = tmp0
;
21964 XVECEXP (par
, 0, 1) = tmp1
;
21965 XVECEXP (par
, 0, 2) = tmp2
;
21966 insn
= emit_insn (par
);
21967 RTX_FRAME_RELATED_P (insn
) = 1;
21968 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21972 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
21975 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
21978 tmp1
= gen_rtx_SET (mem1
, reg1
);
21979 tmp2
= gen_rtx_SET (mem2
, reg2
);
21980 RTX_FRAME_RELATED_P (tmp1
) = 1;
21981 RTX_FRAME_RELATED_P (tmp2
) = 1;
21982 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
21983 XVECEXP (par
, 0, 0) = tmp1
;
21984 XVECEXP (par
, 0, 1) = tmp2
;
21988 /* Create unwind information. This is an approximation. */
21989 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
21990 plus_constant (Pmode
,
21994 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
21995 plus_constant (Pmode
,
22000 RTX_FRAME_RELATED_P (tmp1
) = 1;
22001 RTX_FRAME_RELATED_P (tmp2
) = 1;
22002 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
22003 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
22005 regno
= regno2
+ 1;
22013 /* STRD in ARM mode requires consecutive registers. This function emits STRD
22014 whenever possible, otherwise it emits single-word stores. The first store
22015 also allocates stack space for all saved registers, using writeback with
22016 post-addressing mode. All other stores use offset addressing. If no STRD
22017 can be emitted, this function emits a sequence of single-word stores,
22018 and not an STM as before, because single-word stores provide more freedom
22019 scheduling and can be turned into an STM by peephole optimizations. */
22021 arm_emit_strd_push (unsigned long saved_regs_mask
)
22024 int i
, j
, dwarf_index
= 0;
22026 rtx dwarf
= NULL_RTX
;
22027 rtx insn
= NULL_RTX
;
22030 /* TODO: A more efficient code can be emitted by changing the
22031 layout, e.g., first push all pairs that can use STRD to keep the
22032 stack aligned, and then push all other registers. */
22033 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22034 if (saved_regs_mask
& (1 << i
))
22037 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
22038 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
22039 gcc_assert (num_regs
> 0);
22041 /* Create sequence for DWARF info. */
22042 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
22044 /* For dwarf info, we generate explicit stack update. */
22045 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22046 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
22047 RTX_FRAME_RELATED_P (tmp
) = 1;
22048 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22050 /* Save registers. */
22051 offset
= - 4 * num_regs
;
22053 while (j
<= LAST_ARM_REGNUM
)
22054 if (saved_regs_mask
& (1 << j
))
22057 && (saved_regs_mask
& (1 << (j
+ 1))))
22059 /* Current register and previous register form register pair for
22060 which STRD can be generated. */
22063 /* Allocate stack space for all saved registers. */
22064 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22065 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
22066 mem
= gen_frame_mem (DImode
, tmp
);
22069 else if (offset
> 0)
22070 mem
= gen_frame_mem (DImode
,
22071 plus_constant (Pmode
,
22075 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
22077 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
22078 RTX_FRAME_RELATED_P (tmp
) = 1;
22079 tmp
= emit_insn (tmp
);
22081 /* Record the first store insn. */
22082 if (dwarf_index
== 1)
22085 /* Generate dwarf info. */
22086 mem
= gen_frame_mem (SImode
,
22087 plus_constant (Pmode
,
22090 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22091 RTX_FRAME_RELATED_P (tmp
) = 1;
22092 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22094 mem
= gen_frame_mem (SImode
,
22095 plus_constant (Pmode
,
22098 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
22099 RTX_FRAME_RELATED_P (tmp
) = 1;
22100 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22107 /* Emit a single word store. */
22110 /* Allocate stack space for all saved registers. */
22111 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22112 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
22113 mem
= gen_frame_mem (SImode
, tmp
);
22116 else if (offset
> 0)
22117 mem
= gen_frame_mem (SImode
,
22118 plus_constant (Pmode
,
22122 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
22124 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22125 RTX_FRAME_RELATED_P (tmp
) = 1;
22126 tmp
= emit_insn (tmp
);
22128 /* Record the first store insn. */
22129 if (dwarf_index
== 1)
22132 /* Generate dwarf info. */
22133 mem
= gen_frame_mem (SImode
,
22134 plus_constant(Pmode
,
22137 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22138 RTX_FRAME_RELATED_P (tmp
) = 1;
22139 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22148 /* Attach dwarf info to the first insn we generate. */
22149 gcc_assert (insn
!= NULL_RTX
);
22150 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22151 RTX_FRAME_RELATED_P (insn
) = 1;
22154 /* Generate and emit an insn that we will recognize as a push_multi.
22155 Unfortunately, since this insn does not reflect very well the actual
22156 semantics of the operation, we need to annotate the insn for the benefit
22157 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
22158 MASK for registers that should be annotated for DWARF2 frame unwind
22161 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
22164 int num_dwarf_regs
= 0;
22168 int dwarf_par_index
;
22171 /* We don't record the PC in the dwarf frame information. */
22172 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
22174 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22176 if (mask
& (1 << i
))
22178 if (dwarf_regs_mask
& (1 << i
))
22182 gcc_assert (num_regs
&& num_regs
<= 16);
22183 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
22185 /* For the body of the insn we are going to generate an UNSPEC in
22186 parallel with several USEs. This allows the insn to be recognized
22187 by the push_multi pattern in the arm.md file.
22189 The body of the insn looks something like this:
22192 (set (mem:BLK (pre_modify:SI (reg:SI sp)
22193 (const_int:SI <num>)))
22194 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22200 For the frame note however, we try to be more explicit and actually
22201 show each register being stored into the stack frame, plus a (single)
22202 decrement of the stack pointer. We do it this way in order to be
22203 friendly to the stack unwinding code, which only wants to see a single
22204 stack decrement per instruction. The RTL we generate for the note looks
22205 something like this:
22208 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22209 (set (mem:SI (reg:SI sp)) (reg:SI r4))
22210 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22211 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22215 FIXME:: In an ideal world the PRE_MODIFY would not exist and
22216 instead we'd have a parallel expression detailing all
22217 the stores to the various memory addresses so that debug
22218 information is more up-to-date. Remember however while writing
22219 this to take care of the constraints with the push instruction.
22221 Note also that this has to be taken care of for the VFP registers.
22223 For more see PR43399. */
22225 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
22226 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
22227 dwarf_par_index
= 1;
22229 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22231 if (mask
& (1 << i
))
22233 reg
= gen_rtx_REG (SImode
, i
);
22235 XVECEXP (par
, 0, 0)
22236 = gen_rtx_SET (gen_frame_mem
22238 gen_rtx_PRE_MODIFY (Pmode
,
22241 (Pmode
, stack_pointer_rtx
,
22244 gen_rtx_UNSPEC (BLKmode
,
22245 gen_rtvec (1, reg
),
22246 UNSPEC_PUSH_MULT
));
22248 if (dwarf_regs_mask
& (1 << i
))
22250 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
22252 RTX_FRAME_RELATED_P (tmp
) = 1;
22253 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
22260 for (j
= 1, i
++; j
< num_regs
; i
++)
22262 if (mask
& (1 << i
))
22264 reg
= gen_rtx_REG (SImode
, i
);
22266 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
22268 if (dwarf_regs_mask
& (1 << i
))
22271 = gen_rtx_SET (gen_frame_mem
22273 plus_constant (Pmode
, stack_pointer_rtx
,
22276 RTX_FRAME_RELATED_P (tmp
) = 1;
22277 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
22284 par
= emit_insn (par
);
22286 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22287 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
22288 RTX_FRAME_RELATED_P (tmp
) = 1;
22289 XVECEXP (dwarf
, 0, 0) = tmp
;
22291 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
22296 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22297 SIZE is the offset to be adjusted.
22298 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22300 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
22304 RTX_FRAME_RELATED_P (insn
) = 1;
22305 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
22306 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
22309 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22310 SAVED_REGS_MASK shows which registers need to be restored.
22312 Unfortunately, since this insn does not reflect very well the actual
22313 semantics of the operation, we need to annotate the insn for the benefit
22314 of DWARF2 frame unwind information. */
22316 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
22321 rtx dwarf
= NULL_RTX
;
22323 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
22327 offset_adj
= return_in_pc
? 1 : 0;
22328 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22329 if (saved_regs_mask
& (1 << i
))
22332 gcc_assert (num_regs
&& num_regs
<= 16);
22334 /* If SP is in reglist, then we don't emit SP update insn. */
22335 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
22337 /* The parallel needs to hold num_regs SETs
22338 and one SET for the stack update. */
22339 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
22342 XVECEXP (par
, 0, 0) = ret_rtx
;
22346 /* Increment the stack pointer, based on there being
22347 num_regs 4-byte registers to restore. */
22348 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22349 plus_constant (Pmode
,
22352 RTX_FRAME_RELATED_P (tmp
) = 1;
22353 XVECEXP (par
, 0, offset_adj
) = tmp
;
22356 /* Now restore every reg, which may include PC. */
22357 for (j
= 0, i
= 0; j
< num_regs
; i
++)
22358 if (saved_regs_mask
& (1 << i
))
22360 reg
= gen_rtx_REG (SImode
, i
);
22361 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
22363 /* Emit single load with writeback. */
22364 tmp
= gen_frame_mem (SImode
,
22365 gen_rtx_POST_INC (Pmode
,
22366 stack_pointer_rtx
));
22367 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
22368 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22372 tmp
= gen_rtx_SET (reg
,
22375 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
22376 RTX_FRAME_RELATED_P (tmp
) = 1;
22377 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
22379 /* We need to maintain a sequence for DWARF info too. As dwarf info
22380 should not have PC, skip PC. */
22381 if (i
!= PC_REGNUM
)
22382 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22388 par
= emit_jump_insn (par
);
22390 par
= emit_insn (par
);
22392 REG_NOTES (par
) = dwarf
;
22394 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
22395 stack_pointer_rtx
, stack_pointer_rtx
);
22398 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22399 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22401 Unfortunately, since this insn does not reflect very well the actual
22402 semantics of the operation, we need to annotate the insn for the benefit
22403 of DWARF2 frame unwind information. */
22405 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
22409 rtx dwarf
= NULL_RTX
;
22412 gcc_assert (num_regs
&& num_regs
<= 32);
22414 /* Workaround ARM10 VFPr1 bug. */
22415 if (num_regs
== 2 && !arm_arch6
)
22417 if (first_reg
== 15)
22423 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22424 there could be up to 32 D-registers to restore.
22425 If there are more than 16 D-registers, make two recursive calls,
22426 each of which emits one pop_multi instruction. */
22429 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
22430 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
22434 /* The parallel needs to hold num_regs SETs
22435 and one SET for the stack update. */
22436 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
22438 /* Increment the stack pointer, based on there being
22439 num_regs 8-byte registers to restore. */
22440 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
22441 RTX_FRAME_RELATED_P (tmp
) = 1;
22442 XVECEXP (par
, 0, 0) = tmp
;
22444 /* Now show every reg that will be restored, using a SET for each. */
22445 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
22447 reg
= gen_rtx_REG (DFmode
, i
);
22449 tmp
= gen_rtx_SET (reg
,
22452 plus_constant (Pmode
, base_reg
, 8 * j
)));
22453 RTX_FRAME_RELATED_P (tmp
) = 1;
22454 XVECEXP (par
, 0, j
+ 1) = tmp
;
22456 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22461 par
= emit_insn (par
);
22462 REG_NOTES (par
) = dwarf
;
22464 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22465 if (REGNO (base_reg
) == IP_REGNUM
)
22467 RTX_FRAME_RELATED_P (par
) = 1;
22468 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
22471 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
22472 base_reg
, base_reg
);
22475 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22476 number of registers are being popped, multiple LDRD patterns are created for
22477 all register pairs. If odd number of registers are popped, last register is
22478 loaded by using LDR pattern. */
22480 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
22484 rtx par
= NULL_RTX
;
22485 rtx dwarf
= NULL_RTX
;
22486 rtx tmp
, reg
, tmp1
;
22487 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
22489 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22490 if (saved_regs_mask
& (1 << i
))
22493 gcc_assert (num_regs
&& num_regs
<= 16);
22495 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22496 to be popped. So, if num_regs is even, now it will become odd,
22497 and we can generate pop with PC. If num_regs is odd, it will be
22498 even now, and ldr with return can be generated for PC. */
22502 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
22504 /* Var j iterates over all the registers to gather all the registers in
22505 saved_regs_mask. Var i gives index of saved registers in stack frame.
22506 A PARALLEL RTX of register-pair is created here, so that pattern for
22507 LDRD can be matched. As PC is always last register to be popped, and
22508 we have already decremented num_regs if PC, we don't have to worry
22509 about PC in this loop. */
22510 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
22511 if (saved_regs_mask
& (1 << j
))
22513 /* Create RTX for memory load. */
22514 reg
= gen_rtx_REG (SImode
, j
);
22515 tmp
= gen_rtx_SET (reg
,
22516 gen_frame_mem (SImode
,
22517 plus_constant (Pmode
,
22518 stack_pointer_rtx
, 4 * i
)));
22519 RTX_FRAME_RELATED_P (tmp
) = 1;
22523 /* When saved-register index (i) is even, the RTX to be emitted is
22524 yet to be created. Hence create it first. The LDRD pattern we
22525 are generating is :
22526 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22527 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22528 where target registers need not be consecutive. */
22529 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22533 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22534 added as 0th element and if i is odd, reg_i is added as 1st element
22535 of LDRD pattern shown above. */
22536 XVECEXP (par
, 0, (i
% 2)) = tmp
;
22537 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22541 /* When saved-register index (i) is odd, RTXs for both the registers
22542 to be loaded are generated in above given LDRD pattern, and the
22543 pattern can be emitted now. */
22544 par
= emit_insn (par
);
22545 REG_NOTES (par
) = dwarf
;
22546 RTX_FRAME_RELATED_P (par
) = 1;
22552 /* If the number of registers pushed is odd AND return_in_pc is false OR
22553 number of registers are even AND return_in_pc is true, last register is
22554 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22555 then LDR with post increment. */
22557 /* Increment the stack pointer, based on there being
22558 num_regs 4-byte registers to restore. */
22559 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22560 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
22561 RTX_FRAME_RELATED_P (tmp
) = 1;
22562 tmp
= emit_insn (tmp
);
22565 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
22566 stack_pointer_rtx
, stack_pointer_rtx
);
22571 if (((num_regs
% 2) == 1 && !return_in_pc
)
22572 || ((num_regs
% 2) == 0 && return_in_pc
))
22574 /* Scan for the single register to be popped. Skip until the saved
22575 register is found. */
22576 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
22578 /* Gen LDR with post increment here. */
22579 tmp1
= gen_rtx_MEM (SImode
,
22580 gen_rtx_POST_INC (SImode
,
22581 stack_pointer_rtx
));
22582 set_mem_alias_set (tmp1
, get_frame_alias_set ());
22584 reg
= gen_rtx_REG (SImode
, j
);
22585 tmp
= gen_rtx_SET (reg
, tmp1
);
22586 RTX_FRAME_RELATED_P (tmp
) = 1;
22587 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22591 /* If return_in_pc, j must be PC_REGNUM. */
22592 gcc_assert (j
== PC_REGNUM
);
22593 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22594 XVECEXP (par
, 0, 0) = ret_rtx
;
22595 XVECEXP (par
, 0, 1) = tmp
;
22596 par
= emit_jump_insn (par
);
22600 par
= emit_insn (tmp
);
22601 REG_NOTES (par
) = dwarf
;
22602 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
22603 stack_pointer_rtx
, stack_pointer_rtx
);
22607 else if ((num_regs
% 2) == 1 && return_in_pc
)
22609 /* There are 2 registers to be popped. So, generate the pattern
22610 pop_multiple_with_stack_update_and_return to pop in PC. */
22611 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
22617 /* LDRD in ARM mode needs consecutive registers as operands. This function
22618 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22619 offset addressing and then generates one separate stack udpate. This provides
22620 more scheduling freedom, compared to writeback on every load. However,
22621 if the function returns using load into PC directly
22622 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22623 before the last load. TODO: Add a peephole optimization to recognize
22624 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22625 peephole optimization to merge the load at stack-offset zero
22626 with the stack update instruction using load with writeback
22627 in post-index addressing mode. */
22629 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
22633 rtx par
= NULL_RTX
;
22634 rtx dwarf
= NULL_RTX
;
22637 /* Restore saved registers. */
22638 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
22640 while (j
<= LAST_ARM_REGNUM
)
22641 if (saved_regs_mask
& (1 << j
))
22644 && (saved_regs_mask
& (1 << (j
+ 1)))
22645 && (j
+ 1) != PC_REGNUM
)
22647 /* Current register and next register form register pair for which
22648 LDRD can be generated. PC is always the last register popped, and
22649 we handle it separately. */
22651 mem
= gen_frame_mem (DImode
,
22652 plus_constant (Pmode
,
22656 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
22658 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
22659 tmp
= emit_insn (tmp
);
22660 RTX_FRAME_RELATED_P (tmp
) = 1;
22662 /* Generate dwarf info. */
22664 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22665 gen_rtx_REG (SImode
, j
),
22667 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22668 gen_rtx_REG (SImode
, j
+ 1),
22671 REG_NOTES (tmp
) = dwarf
;
22676 else if (j
!= PC_REGNUM
)
22678 /* Emit a single word load. */
22680 mem
= gen_frame_mem (SImode
,
22681 plus_constant (Pmode
,
22685 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
22687 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
22688 tmp
= emit_insn (tmp
);
22689 RTX_FRAME_RELATED_P (tmp
) = 1;
22691 /* Generate dwarf info. */
22692 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
22693 gen_rtx_REG (SImode
, j
),
22699 else /* j == PC_REGNUM */
22705 /* Update the stack. */
22708 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22709 plus_constant (Pmode
,
22712 tmp
= emit_insn (tmp
);
22713 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
22714 stack_pointer_rtx
, stack_pointer_rtx
);
22718 if (saved_regs_mask
& (1 << PC_REGNUM
))
22720 /* Only PC is to be popped. */
22721 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22722 XVECEXP (par
, 0, 0) = ret_rtx
;
22723 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
22724 gen_frame_mem (SImode
,
22725 gen_rtx_POST_INC (SImode
,
22726 stack_pointer_rtx
)));
22727 RTX_FRAME_RELATED_P (tmp
) = 1;
22728 XVECEXP (par
, 0, 1) = tmp
;
22729 par
= emit_jump_insn (par
);
22731 /* Generate dwarf info. */
22732 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22733 gen_rtx_REG (SImode
, PC_REGNUM
),
22735 REG_NOTES (par
) = dwarf
;
22736 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
22737 stack_pointer_rtx
, stack_pointer_rtx
);
22741 /* Calculate the size of the return value that is passed in registers. */
22743 arm_size_return_regs (void)
22747 if (crtl
->return_rtx
!= 0)
22748 mode
= GET_MODE (crtl
->return_rtx
);
22750 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
22752 return GET_MODE_SIZE (mode
);
22755 /* Return true if the current function needs to save/restore LR. */
22757 thumb_force_lr_save (void)
22759 return !cfun
->machine
->lr_save_eliminated
22761 || thumb_far_jump_used_p ()
22762 || df_regs_ever_live_p (LR_REGNUM
));
22765 /* We do not know if r3 will be available because
22766 we do have an indirect tailcall happening in this
22767 particular case. */
22769 is_indirect_tailcall_p (rtx call
)
22771 rtx pat
= PATTERN (call
);
22773 /* Indirect tail call. */
22774 pat
= XVECEXP (pat
, 0, 0);
22775 if (GET_CODE (pat
) == SET
)
22776 pat
= SET_SRC (pat
);
22778 pat
= XEXP (XEXP (pat
, 0), 0);
22779 return REG_P (pat
);
22782 /* Return true if r3 is used by any of the tail call insns in the
22783 current function. */
22785 any_sibcall_could_use_r3 (void)
22790 if (!crtl
->tail_call_emit
)
22792 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
22793 if (e
->flags
& EDGE_SIBCALL
)
22795 rtx_insn
*call
= BB_END (e
->src
);
22796 if (!CALL_P (call
))
22797 call
= prev_nonnote_nondebug_insn (call
);
22798 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
22799 if (find_regno_fusage (call
, USE
, 3)
22800 || is_indirect_tailcall_p (call
))
22807 /* Compute the distance from register FROM to register TO.
22808 These can be the arg pointer (26), the soft frame pointer (25),
22809 the stack pointer (13) or the hard frame pointer (11).
22810 In thumb mode r7 is used as the soft frame pointer, if needed.
22811 Typical stack layout looks like this:
22813 old stack pointer -> | |
22816 | | saved arguments for
22817 | | vararg functions
22820 hard FP & arg pointer -> | | \
22828 soft frame pointer -> | | /
22833 locals base pointer -> | | /
22838 current stack pointer -> | | /
22841 For a given function some or all of these stack components
22842 may not be needed, giving rise to the possibility of
22843 eliminating some of the registers.
22845 The values returned by this function must reflect the behavior
22846 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22848 The sign of the number returned reflects the direction of stack
22849 growth, so the values are positive for all eliminations except
22850 from the soft frame pointer to the hard frame pointer.
22852 SFP may point just inside the local variables block to ensure correct
22856 /* Return cached stack offsets. */
22858 static arm_stack_offsets
*
22859 arm_get_frame_offsets (void)
22861 struct arm_stack_offsets
*offsets
;
22863 offsets
= &cfun
->machine
->stack_offsets
;
22869 /* Calculate stack offsets. These are used to calculate register elimination
22870 offsets and in prologue/epilogue code. Also calculates which registers
22871 should be saved. */
22874 arm_compute_frame_layout (void)
22876 struct arm_stack_offsets
*offsets
;
22877 unsigned long func_type
;
22880 HOST_WIDE_INT frame_size
;
22883 offsets
= &cfun
->machine
->stack_offsets
;
22885 /* Initially this is the size of the local variables. It will translated
22886 into an offset once we have determined the size of preceding data. */
22887 frame_size
= ROUND_UP_WORD (get_frame_size ());
22889 /* Space for variadic functions. */
22890 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
22892 /* In Thumb mode this is incorrect, but never used. */
22894 = (offsets
->saved_args
22895 + arm_compute_static_chain_stack_bytes ()
22896 + (frame_pointer_needed
? 4 : 0));
22900 unsigned int regno
;
22902 offsets
->saved_regs_mask
= arm_compute_save_core_reg_mask ();
22903 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
22904 saved
= core_saved
;
22906 /* We know that SP will be doubleword aligned on entry, and we must
22907 preserve that condition at any subroutine call. We also require the
22908 soft frame pointer to be doubleword aligned. */
22910 if (TARGET_REALLY_IWMMXT
)
22912 /* Check for the call-saved iWMMXt registers. */
22913 for (regno
= FIRST_IWMMXT_REGNUM
;
22914 regno
<= LAST_IWMMXT_REGNUM
;
22916 if (reg_needs_saving_p (regno
))
22920 func_type
= arm_current_func_type ();
22921 /* Space for saved VFP registers. */
22922 if (! IS_VOLATILE (func_type
)
22923 && TARGET_VFP_BASE
)
22924 saved
+= arm_get_vfp_saved_size ();
22926 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
22927 nonecure entry functions with VSTR/VLDR. */
22928 if (TARGET_HAVE_FPCXT_CMSE
&& IS_CMSE_ENTRY (func_type
))
22931 else /* TARGET_THUMB1 */
22933 offsets
->saved_regs_mask
= thumb1_compute_save_core_reg_mask ();
22934 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
22935 saved
= core_saved
;
22936 if (TARGET_BACKTRACE
)
22940 /* Saved registers include the stack frame. */
22941 offsets
->saved_regs
22942 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
22943 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
22945 /* A leaf function does not need any stack alignment if it has nothing
22947 if (crtl
->is_leaf
&& frame_size
== 0
22948 /* However if it calls alloca(), we have a dynamically allocated
22949 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
22950 && ! cfun
->calls_alloca
)
22952 offsets
->outgoing_args
= offsets
->soft_frame
;
22953 offsets
->locals_base
= offsets
->soft_frame
;
22957 /* Ensure SFP has the correct alignment. */
22958 if (ARM_DOUBLEWORD_ALIGN
22959 && (offsets
->soft_frame
& 7))
22961 offsets
->soft_frame
+= 4;
22962 /* Try to align stack by pushing an extra reg. Don't bother doing this
22963 when there is a stack frame as the alignment will be rolled into
22964 the normal stack adjustment. */
22965 if (frame_size
+ crtl
->outgoing_args_size
== 0)
22969 /* Register r3 is caller-saved. Normally it does not need to be
22970 saved on entry by the prologue. However if we choose to save
22971 it for padding then we may confuse the compiler into thinking
22972 a prologue sequence is required when in fact it is not. This
22973 will occur when shrink-wrapping if r3 is used as a scratch
22974 register and there are no other callee-saved writes.
22976 This situation can be avoided when other callee-saved registers
22977 are available and r3 is not mandatory if we choose a callee-saved
22978 register for padding. */
22979 bool prefer_callee_reg_p
= false;
22981 /* If it is safe to use r3, then do so. This sometimes
22982 generates better code on Thumb-2 by avoiding the need to
22983 use 32-bit push/pop instructions. */
22984 if (! any_sibcall_could_use_r3 ()
22985 && arm_size_return_regs () <= 12
22986 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
22988 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
22991 if (!TARGET_THUMB2
)
22992 prefer_callee_reg_p
= true;
22995 || prefer_callee_reg_p
)
22997 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
22999 /* Avoid fixed registers; they may be changed at
23000 arbitrary times so it's unsafe to restore them
23001 during the epilogue. */
23003 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
23013 offsets
->saved_regs
+= 4;
23014 offsets
->saved_regs_mask
|= (1 << reg
);
23019 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
23020 offsets
->outgoing_args
= (offsets
->locals_base
23021 + crtl
->outgoing_args_size
);
23023 if (ARM_DOUBLEWORD_ALIGN
)
23025 /* Ensure SP remains doubleword aligned. */
23026 if (offsets
->outgoing_args
& 7)
23027 offsets
->outgoing_args
+= 4;
23028 gcc_assert (!(offsets
->outgoing_args
& 7));
23033 /* Calculate the relative offsets for the different stack pointers. Positive
23034 offsets are in the direction of stack growth. */
23037 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
23039 arm_stack_offsets
*offsets
;
23041 offsets
= arm_get_frame_offsets ();
23043 /* OK, now we have enough information to compute the distances.
23044 There must be an entry in these switch tables for each pair
23045 of registers in ELIMINABLE_REGS, even if some of the entries
23046 seem to be redundant or useless. */
23049 case ARG_POINTER_REGNUM
:
23052 case THUMB_HARD_FRAME_POINTER_REGNUM
:
23055 case FRAME_POINTER_REGNUM
:
23056 /* This is the reverse of the soft frame pointer
23057 to hard frame pointer elimination below. */
23058 return offsets
->soft_frame
- offsets
->saved_args
;
23060 case ARM_HARD_FRAME_POINTER_REGNUM
:
23061 /* This is only non-zero in the case where the static chain register
23062 is stored above the frame. */
23063 return offsets
->frame
- offsets
->saved_args
- 4;
23065 case STACK_POINTER_REGNUM
:
23066 /* If nothing has been pushed on the stack at all
23067 then this will return -4. This *is* correct! */
23068 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
23071 gcc_unreachable ();
23073 gcc_unreachable ();
23075 case FRAME_POINTER_REGNUM
:
23078 case THUMB_HARD_FRAME_POINTER_REGNUM
:
23081 case ARM_HARD_FRAME_POINTER_REGNUM
:
23082 /* The hard frame pointer points to the top entry in the
23083 stack frame. The soft frame pointer to the bottom entry
23084 in the stack frame. If there is no stack frame at all,
23085 then they are identical. */
23087 return offsets
->frame
- offsets
->soft_frame
;
23089 case STACK_POINTER_REGNUM
:
23090 return offsets
->outgoing_args
- offsets
->soft_frame
;
23093 gcc_unreachable ();
23095 gcc_unreachable ();
23098 /* You cannot eliminate from the stack pointer.
23099 In theory you could eliminate from the hard frame
23100 pointer to the stack pointer, but this will never
23101 happen, since if a stack frame is not needed the
23102 hard frame pointer will never be used. */
23103 gcc_unreachable ();
23107 /* Given FROM and TO register numbers, say whether this elimination is
23108 allowed. Frame pointer elimination is automatically handled.
23110 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
23111 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
23112 pointer, we must eliminate FRAME_POINTER_REGNUM into
23113 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23114 ARG_POINTER_REGNUM. */
23117 arm_can_eliminate (const int from
, const int to
)
23119 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
23120 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
23121 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
23122 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
23126 /* Emit RTL to save coprocessor registers on function entry. Returns the
23127 number of bytes pushed. */
23130 arm_save_coproc_regs(void)
23132 int saved_size
= 0;
23134 unsigned start_reg
;
23137 if (TARGET_REALLY_IWMMXT
)
23138 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
23139 if (reg_needs_saving_p (reg
))
23141 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23142 insn
= gen_rtx_MEM (V2SImode
, insn
);
23143 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
23144 RTX_FRAME_RELATED_P (insn
) = 1;
23148 if (TARGET_VFP_BASE
)
23150 start_reg
= FIRST_VFP_REGNUM
;
23152 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
23154 if (!reg_needs_saving_p (reg
) && !reg_needs_saving_p (reg
+ 1))
23156 if (start_reg
!= reg
)
23157 saved_size
+= vfp_emit_fstmd (start_reg
,
23158 (reg
- start_reg
) / 2);
23159 start_reg
= reg
+ 2;
23162 if (start_reg
!= reg
)
23163 saved_size
+= vfp_emit_fstmd (start_reg
,
23164 (reg
- start_reg
) / 2);
23170 /* Set the Thumb frame pointer from the stack pointer. */
23173 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
23175 HOST_WIDE_INT amount
;
23178 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
23180 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23181 stack_pointer_rtx
, GEN_INT (amount
)));
23184 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
23185 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
23186 expects the first two operands to be the same. */
23189 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23191 hard_frame_pointer_rtx
));
23195 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23196 hard_frame_pointer_rtx
,
23197 stack_pointer_rtx
));
23199 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
23200 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
23201 RTX_FRAME_RELATED_P (dwarf
) = 1;
23202 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23205 RTX_FRAME_RELATED_P (insn
) = 1;
23208 struct scratch_reg
{
23213 /* Return a short-lived scratch register for use as a 2nd scratch register on
23214 function entry after the registers are saved in the prologue. This register
23215 must be released by means of release_scratch_register_on_entry. IP is not
23216 considered since it is always used as the 1st scratch register if available.
23218 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23219 mask of live registers. */
23222 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
23223 unsigned long live_regs
)
23229 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
23235 for (i
= 4; i
< 11; i
++)
23236 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
23244 /* If IP is used as the 1st scratch register for a nested function,
23245 then either r3 wasn't available or is used to preserve IP. */
23246 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
23248 regno
= (regno1
== 3 ? 2 : 3);
23250 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
23255 sr
->reg
= gen_rtx_REG (SImode
, regno
);
23258 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23259 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
23260 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
23261 plus_constant (Pmode
, stack_pointer_rtx
, -4));
23262 RTX_FRAME_RELATED_P (insn
) = 1;
23263 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
23267 /* Release a scratch register obtained from the preceding function. */
23270 release_scratch_register_on_entry (struct scratch_reg
*sr
)
23274 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
23275 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
23276 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
23277 plus_constant (Pmode
, stack_pointer_rtx
, 4));
23278 RTX_FRAME_RELATED_P (insn
) = 1;
23279 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
23283 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23285 #if PROBE_INTERVAL > 4096
23286 #error Cannot use indexed addressing mode for stack probing
23289 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23290 inclusive. These are offsets from the current stack pointer. REGNO1
23291 is the index number of the 1st scratch register and LIVE_REGS is the
23292 mask of live registers. */
23295 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
23296 unsigned int regno1
, unsigned long live_regs
)
23298 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
23300 /* See if we have a constant small number of probes to generate. If so,
23301 that's the easy case. */
23302 if (size
<= PROBE_INTERVAL
)
23304 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
23305 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23306 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
23309 /* The run-time loop is made up of 10 insns in the generic case while the
23310 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23311 else if (size
<= 5 * PROBE_INTERVAL
)
23313 HOST_WIDE_INT i
, rem
;
23315 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
23316 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23317 emit_stack_probe (reg1
);
23319 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23320 it exceeds SIZE. If only two probes are needed, this will not
23321 generate any code. Then probe at FIRST + SIZE. */
23322 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
23324 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
23325 emit_stack_probe (reg1
);
23328 rem
= size
- (i
- PROBE_INTERVAL
);
23329 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
23331 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
23332 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
23335 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
23338 /* Otherwise, do the same as above, but in a loop. Note that we must be
23339 extra careful with variables wrapping around because we might be at
23340 the very top (or the very bottom) of the address space and we have
23341 to be able to handle this case properly; in particular, we use an
23342 equality test for the loop condition. */
23345 HOST_WIDE_INT rounded_size
;
23346 struct scratch_reg sr
;
23348 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
23350 emit_move_insn (reg1
, GEN_INT (first
));
23353 /* Step 1: round SIZE to the previous multiple of the interval. */
23355 rounded_size
= size
& -PROBE_INTERVAL
;
23356 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
23359 /* Step 2: compute initial and final value of the loop counter. */
23361 /* TEST_ADDR = SP + FIRST. */
23362 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23364 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23365 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
23368 /* Step 3: the loop
23372 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23375 while (TEST_ADDR != LAST_ADDR)
23377 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23378 until it is equal to ROUNDED_SIZE. */
23380 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
23383 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23384 that SIZE is equal to ROUNDED_SIZE. */
23386 if (size
!= rounded_size
)
23388 HOST_WIDE_INT rem
= size
- rounded_size
;
23390 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
23392 emit_set_insn (sr
.reg
,
23393 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
23394 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
23395 PROBE_INTERVAL
- rem
));
23398 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
23401 release_scratch_register_on_entry (&sr
);
23404 /* Make sure nothing is scheduled before we are done. */
23405 emit_insn (gen_blockage ());
23408 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23409 absolute addresses. */
23412 output_probe_stack_range (rtx reg1
, rtx reg2
)
23414 static int labelno
= 0;
23418 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
23421 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
23423 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23425 xops
[1] = GEN_INT (PROBE_INTERVAL
);
23426 output_asm_insn ("sub\t%0, %0, %1", xops
);
23428 /* Probe at TEST_ADDR. */
23429 output_asm_insn ("str\tr0, [%0, #0]", xops
);
23431 /* Test if TEST_ADDR == LAST_ADDR. */
23433 output_asm_insn ("cmp\t%0, %1", xops
);
23436 fputs ("\tbne\t", asm_out_file
);
23437 assemble_name_raw (asm_out_file
, loop_lab
);
23438 fputc ('\n', asm_out_file
);
23443 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23446 arm_expand_prologue (void)
23451 unsigned long live_regs_mask
;
23452 unsigned long func_type
;
23454 int saved_pretend_args
= 0;
23455 int saved_regs
= 0;
23456 unsigned HOST_WIDE_INT args_to_push
;
23457 HOST_WIDE_INT size
;
23458 arm_stack_offsets
*offsets
;
23461 func_type
= arm_current_func_type ();
23463 /* Naked functions don't have prologues. */
23464 if (IS_NAKED (func_type
))
23466 if (flag_stack_usage_info
)
23467 current_function_static_stack_size
= 0;
23471 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23472 args_to_push
= crtl
->args
.pretend_args_size
;
23474 /* Compute which register we will have to save onto the stack. */
23475 offsets
= arm_get_frame_offsets ();
23476 live_regs_mask
= offsets
->saved_regs_mask
;
23478 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
23480 if (IS_STACKALIGN (func_type
))
23484 /* Handle a word-aligned stack pointer. We generate the following:
23489 <save and restore r0 in normal prologue/epilogue>
23493 The unwinder doesn't need to know about the stack realignment.
23494 Just tell it we saved SP in r0. */
23495 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
23497 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
23498 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
23500 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
23501 RTX_FRAME_RELATED_P (insn
) = 1;
23502 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
23504 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
23506 /* ??? The CFA changes here, which may cause GDB to conclude that it
23507 has entered a different function. That said, the unwind info is
23508 correct, individually, before and after this instruction because
23509 we've described the save of SP, which will override the default
23510 handling of SP as restoring from the CFA. */
23511 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
23514 /* Let's compute the static_chain_stack_bytes required and store it. Right
23515 now the value must be -1 as stored by arm_init_machine_status (). */
23516 cfun
->machine
->static_chain_stack_bytes
23517 = arm_compute_static_chain_stack_bytes ();
23519 /* The static chain register is the same as the IP register. If it is
23520 clobbered when creating the frame, we need to save and restore it. */
23521 clobber_ip
= IS_NESTED (func_type
)
23522 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
23523 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
23524 || flag_stack_clash_protection
)
23525 && !df_regs_ever_live_p (LR_REGNUM
)
23526 && arm_r3_live_at_start_p ()));
23528 /* Find somewhere to store IP whilst the frame is being created.
23529 We try the following places in order:
23531 1. The last argument register r3 if it is available.
23532 2. A slot on the stack above the frame if there are no
23533 arguments to push onto the stack.
23534 3. Register r3 again, after pushing the argument registers
23535 onto the stack, if this is a varargs function.
23536 4. The last slot on the stack created for the arguments to
23537 push, if this isn't a varargs function.
23539 Note - we only need to tell the dwarf2 backend about the SP
23540 adjustment in the second variant; the static chain register
23541 doesn't need to be unwound, as it doesn't contain a value
23542 inherited from the caller. */
23545 if (!arm_r3_live_at_start_p ())
23546 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
23547 else if (args_to_push
== 0)
23551 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
23554 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23555 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
23558 /* Just tell the dwarf backend that we adjusted SP. */
23559 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23560 plus_constant (Pmode
, stack_pointer_rtx
,
23562 RTX_FRAME_RELATED_P (insn
) = 1;
23563 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23567 /* Store the args on the stack. */
23568 if (cfun
->machine
->uses_anonymous_args
)
23570 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
23571 (0xf0 >> (args_to_push
/ 4)) & 0xf);
23572 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
23573 saved_pretend_args
= 1;
23579 if (args_to_push
== 4)
23580 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23582 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
23583 plus_constant (Pmode
,
23587 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
23589 /* Just tell the dwarf backend that we adjusted SP. */
23590 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23591 plus_constant (Pmode
, stack_pointer_rtx
,
23593 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23596 RTX_FRAME_RELATED_P (insn
) = 1;
23597 fp_offset
= args_to_push
;
23602 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
23604 if (IS_INTERRUPT (func_type
))
23606 /* Interrupt functions must not corrupt any registers.
23607 Creating a frame pointer however, corrupts the IP
23608 register, so we must push it first. */
23609 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
23611 /* Do not set RTX_FRAME_RELATED_P on this insn.
23612 The dwarf stack unwinding code only wants to see one
23613 stack decrement per function, and this is not it. If
23614 this instruction is labeled as being part of the frame
23615 creation sequence then dwarf2out_frame_debug_expr will
23616 die when it encounters the assignment of IP to FP
23617 later on, since the use of SP here establishes SP as
23618 the CFA register and not IP.
23620 Anyway this instruction is not really part of the stack
23621 frame creation although it is part of the prologue. */
23624 insn
= emit_set_insn (ip_rtx
,
23625 plus_constant (Pmode
, stack_pointer_rtx
,
23627 RTX_FRAME_RELATED_P (insn
) = 1;
23630 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23631 if (TARGET_HAVE_FPCXT_CMSE
&& IS_CMSE_ENTRY (func_type
))
23634 insn
= emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx
,
23635 GEN_INT (FPCXTNS_ENUM
)));
23636 rtx dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23637 plus_constant (Pmode
, stack_pointer_rtx
, -4));
23638 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23639 RTX_FRAME_RELATED_P (insn
) = 1;
23644 /* Push the argument registers, or reserve space for them. */
23645 if (cfun
->machine
->uses_anonymous_args
)
23646 insn
= emit_multi_reg_push
23647 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
23648 (0xf0 >> (args_to_push
/ 4)) & 0xf);
23651 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
23652 GEN_INT (- args_to_push
)));
23653 RTX_FRAME_RELATED_P (insn
) = 1;
23656 /* If this is an interrupt service routine, and the link register
23657 is going to be pushed, and we're not generating extra
23658 push of IP (needed when frame is needed and frame layout if apcs),
23659 subtracting four from LR now will mean that the function return
23660 can be done with a single instruction. */
23661 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
23662 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
23663 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
23666 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
23668 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
23671 if (live_regs_mask
)
23673 unsigned long dwarf_regs_mask
= live_regs_mask
;
23675 saved_regs
+= bit_count (live_regs_mask
) * 4;
23676 if (optimize_size
&& !frame_pointer_needed
23677 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
23679 /* If no coprocessor registers are being pushed and we don't have
23680 to worry about a frame pointer then push extra registers to
23681 create the stack frame. This is done in a way that does not
23682 alter the frame layout, so is independent of the epilogue. */
23686 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
23688 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
23689 if (frame
&& n
* 4 >= frame
)
23692 live_regs_mask
|= (1 << n
) - 1;
23693 saved_regs
+= frame
;
23698 && current_tune
->prefer_ldrd_strd
23699 && !optimize_function_for_size_p (cfun
))
23701 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
23703 thumb2_emit_strd_push (live_regs_mask
);
23704 else if (TARGET_ARM
23705 && !TARGET_APCS_FRAME
23706 && !IS_INTERRUPT (func_type
))
23707 arm_emit_strd_push (live_regs_mask
);
23710 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
23711 RTX_FRAME_RELATED_P (insn
) = 1;
23716 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
23717 RTX_FRAME_RELATED_P (insn
) = 1;
23721 if (! IS_VOLATILE (func_type
))
23722 saved_regs
+= arm_save_coproc_regs ();
23724 if (frame_pointer_needed
&& TARGET_ARM
)
23726 /* Create the new frame pointer. */
23727 if (TARGET_APCS_FRAME
)
23729 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
23730 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
23731 RTX_FRAME_RELATED_P (insn
) = 1;
23735 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
23736 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23737 stack_pointer_rtx
, insn
));
23738 RTX_FRAME_RELATED_P (insn
) = 1;
23742 size
= offsets
->outgoing_args
- offsets
->saved_args
;
23743 if (flag_stack_usage_info
)
23744 current_function_static_stack_size
= size
;
23746 /* If this isn't an interrupt service routine and we have a frame, then do
23747 stack checking. We use IP as the first scratch register, except for the
23748 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
23749 if (!IS_INTERRUPT (func_type
)
23750 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
23751 || flag_stack_clash_protection
))
23753 unsigned int regno
;
23755 if (!IS_NESTED (func_type
) || clobber_ip
)
23757 else if (df_regs_ever_live_p (LR_REGNUM
))
23762 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
23764 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
23765 arm_emit_probe_stack_range (get_stack_check_protect (),
23766 size
- get_stack_check_protect (),
23767 regno
, live_regs_mask
);
23770 arm_emit_probe_stack_range (get_stack_check_protect (), size
,
23771 regno
, live_regs_mask
);
23774 /* Recover the static chain register. */
23777 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
23778 insn
= gen_rtx_REG (SImode
, 3);
23781 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
23782 insn
= gen_frame_mem (SImode
, insn
);
23784 emit_set_insn (ip_rtx
, insn
);
23785 emit_insn (gen_force_register_use (ip_rtx
));
23788 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
23790 /* This add can produce multiple insns for a large constant, so we
23791 need to get tricky. */
23792 rtx_insn
*last
= get_last_insn ();
23794 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
23795 - offsets
->outgoing_args
);
23797 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
23801 last
= last
? NEXT_INSN (last
) : get_insns ();
23802 RTX_FRAME_RELATED_P (last
) = 1;
23804 while (last
!= insn
);
23806 /* If the frame pointer is needed, emit a special barrier that
23807 will prevent the scheduler from moving stores to the frame
23808 before the stack adjustment. */
23809 if (frame_pointer_needed
)
23810 emit_insn (gen_stack_tie (stack_pointer_rtx
,
23811 hard_frame_pointer_rtx
));
23815 if (frame_pointer_needed
&& TARGET_THUMB2
)
23816 thumb_set_frame_pointer (offsets
);
23818 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
23820 unsigned long mask
;
23822 mask
= live_regs_mask
;
23823 mask
&= THUMB2_WORK_REGS
;
23824 if (!IS_NESTED (func_type
))
23825 mask
|= (1 << IP_REGNUM
);
23826 arm_load_pic_register (mask
, NULL_RTX
);
23829 /* If we are profiling, make sure no instructions are scheduled before
23830 the call to mcount. Similarly if the user has requested no
23831 scheduling in the prolog. Similarly if we want non-call exceptions
23832 using the EABI unwinder, to prevent faulting instructions from being
23833 swapped with a stack adjustment. */
23834 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
23835 || (arm_except_unwind_info (&global_options
) == UI_TARGET
23836 && cfun
->can_throw_non_call_exceptions
))
23837 emit_insn (gen_blockage ());
23839 /* If the link register is being kept alive, with the return address in it,
23840 then make sure that it does not get reused by the ce2 pass. */
23841 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
23842 cfun
->machine
->lr_save_eliminated
= 1;
23845 /* Print condition code to STREAM. Helper function for arm_print_operand. */
23847 arm_print_condition (FILE *stream
)
23849 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
23851 /* Branch conversion is not implemented for Thumb-2. */
23854 output_operand_lossage ("predicated Thumb instruction");
23857 if (current_insn_predicate
!= NULL
)
23859 output_operand_lossage
23860 ("predicated instruction in conditional sequence");
23864 fputs (arm_condition_codes
[arm_current_cc
], stream
);
23866 else if (current_insn_predicate
)
23868 enum arm_cond_code code
;
23872 output_operand_lossage ("predicated Thumb instruction");
23876 code
= get_arm_condition_code (current_insn_predicate
);
23877 fputs (arm_condition_codes
[code
], stream
);
23882 /* Globally reserved letters: acln
23883 Puncutation letters currently used: @_|?().!#
23884 Lower case letters currently used: bcdefhimpqtvwxyz
23885 Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
23886 Letters previously used, but now deprecated/obsolete: sWXYZ.
23888 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23890 If CODE is 'd', then the X is a condition operand and the instruction
23891 should only be executed if the condition is true.
23892 if CODE is 'D', then the X is a condition operand and the instruction
23893 should only be executed if the condition is false: however, if the mode
23894 of the comparison is CCFPEmode, then always execute the instruction -- we
23895 do this because in these circumstances !GE does not necessarily imply LT;
23896 in these cases the instruction pattern will take care to make sure that
23897 an instruction containing %d will follow, thereby undoing the effects of
23898 doing this instruction unconditionally.
23899 If CODE is 'N' then X is a floating point operand that must be negated
23901 If CODE is 'B' then output a bitwise inverted value of X (a const int).
23902 If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
23903 If CODE is 'V', then the operand must be a CONST_INT representing
23904 the bits to preserve in the modified register (Rd) of a BFI or BFC
23905 instruction: print out both the width and lsb (shift) fields. */
23907 arm_print_operand (FILE *stream
, rtx x
, int code
)
23912 fputs (ASM_COMMENT_START
, stream
);
23916 fputs (user_label_prefix
, stream
);
23920 fputs (REGISTER_PREFIX
, stream
);
23924 arm_print_condition (stream
);
23928 /* The current condition code for a condition code setting instruction.
23929 Preceded by 's' in unified syntax, otherwise followed by 's'. */
23930 fputc('s', stream
);
23931 arm_print_condition (stream
);
23935 /* If the instruction is conditionally executed then print
23936 the current condition code, otherwise print 's'. */
23937 gcc_assert (TARGET_THUMB2
);
23938 if (current_insn_predicate
)
23939 arm_print_condition (stream
);
23941 fputc('s', stream
);
23944 /* %# is a "break" sequence. It doesn't output anything, but is used to
23945 separate e.g. operand numbers from following text, if that text consists
23946 of further digits which we don't want to be part of the operand
23954 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
23955 fprintf (stream
, "%s", fp_const_from_val (&r
));
23959 /* An integer or symbol address without a preceding # sign. */
23961 switch (GET_CODE (x
))
23964 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
23968 output_addr_const (stream
, x
);
23972 if (GET_CODE (XEXP (x
, 0)) == PLUS
23973 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
23975 output_addr_const (stream
, x
);
23978 /* Fall through. */
23981 output_operand_lossage ("Unsupported operand for code '%c'", code
);
23985 /* An integer that we want to print in HEX. */
23987 switch (GET_CODE (x
))
23990 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
23994 output_operand_lossage ("Unsupported operand for code '%c'", code
);
23999 if (CONST_INT_P (x
))
24002 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
24003 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
24007 putc ('~', stream
);
24008 output_addr_const (stream
, x
);
24013 /* Print the log2 of a CONST_INT. */
24017 if (!CONST_INT_P (x
)
24018 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
24019 output_operand_lossage ("Unsupported operand for code '%c'", code
);
24021 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
24026 /* The low 16 bits of an immediate constant. */
24027 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
24031 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
24035 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
24043 shift
= shift_op (x
, &val
);
24047 fprintf (stream
, ", %s ", shift
);
24049 arm_print_operand (stream
, XEXP (x
, 1), 0);
24051 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
24056 /* An explanation of the 'Q', 'R' and 'H' register operands:
24058 In a pair of registers containing a DI or DF value the 'Q'
24059 operand returns the register number of the register containing
24060 the least significant part of the value. The 'R' operand returns
24061 the register number of the register containing the most
24062 significant part of the value.
24064 The 'H' operand returns the higher of the two register numbers.
24065 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24066 same as the 'Q' operand, since the most significant part of the
24067 value is held in the lower number register. The reverse is true
24068 on systems where WORDS_BIG_ENDIAN is false.
24070 The purpose of these operands is to distinguish between cases
24071 where the endian-ness of the values is important (for example
24072 when they are added together), and cases where the endian-ness
24073 is irrelevant, but the order of register operations is important.
24074 For example when loading a value from memory into a register
24075 pair, the endian-ness does not matter. Provided that the value
24076 from the lower memory address is put into the lower numbered
24077 register, and the value from the higher address is put into the
24078 higher numbered register, the load will work regardless of whether
24079 the value being loaded is big-wordian or little-wordian. The
24080 order of the two register loads can matter however, if the address
24081 of the memory location is actually held in one of the registers
24082 being overwritten by the load.
24084 The 'Q' and 'R' constraints are also available for 64-bit
24087 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
24089 rtx part
= gen_lowpart (SImode
, x
);
24090 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
24094 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24096 output_operand_lossage ("invalid operand for code '%c'", code
);
24100 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
24104 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
24106 machine_mode mode
= GET_MODE (x
);
24109 if (mode
== VOIDmode
)
24111 part
= gen_highpart_mode (SImode
, mode
, x
);
24112 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
24116 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24118 output_operand_lossage ("invalid operand for code '%c'", code
);
24122 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
24126 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24128 output_operand_lossage ("invalid operand for code '%c'", code
);
24132 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
24136 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24138 output_operand_lossage ("invalid operand for code '%c'", code
);
24142 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
24146 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24148 output_operand_lossage ("invalid operand for code '%c'", code
);
24152 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
24156 asm_fprintf (stream
, "%r",
24157 REG_P (XEXP (x
, 0))
24158 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
24162 asm_fprintf (stream
, "{%r-%r}",
24164 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
24167 /* Like 'M', but writing doubleword vector registers, for use by Neon
24171 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
24172 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
24174 asm_fprintf (stream
, "{d%d}", regno
);
24176 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
24181 /* CONST_TRUE_RTX means always -- that's the default. */
24182 if (x
== const_true_rtx
)
24185 if (!COMPARISON_P (x
))
24187 output_operand_lossage ("invalid operand for code '%c'", code
);
24191 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
24196 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
24197 want to do that. */
24198 if (x
== const_true_rtx
)
24200 output_operand_lossage ("instruction never executed");
24203 if (!COMPARISON_P (x
))
24205 output_operand_lossage ("invalid operand for code '%c'", code
);
24209 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
24210 (get_arm_condition_code (x
))],
24216 /* Output the LSB (shift) and width for a bitmask instruction
24217 based on a literal mask. The LSB is printed first,
24218 followed by the width.
24220 Eg. For 0b1...1110001, the result is #1, #3. */
24221 if (!CONST_INT_P (x
))
24223 output_operand_lossage ("invalid operand for code '%c'", code
);
24227 unsigned HOST_WIDE_INT val
24228 = ~UINTVAL (x
) & HOST_WIDE_INT_UC (0xffffffff);
24229 int lsb
= exact_log2 (val
& -val
);
24230 asm_fprintf (stream
, "#%d, #%d", lsb
,
24231 (exact_log2 (val
+ (val
& -val
)) - lsb
));
24240 /* Former Maverick support, removed after GCC-4.7. */
24241 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
24246 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
24247 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
24248 /* Bad value for wCG register number. */
24250 output_operand_lossage ("invalid operand for code '%c'", code
);
24255 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
24258 /* Print an iWMMXt control register name. */
24260 if (!CONST_INT_P (x
)
24262 || INTVAL (x
) >= 16)
24263 /* Bad value for wC register number. */
24265 output_operand_lossage ("invalid operand for code '%c'", code
);
24271 static const char * wc_reg_names
[16] =
24273 "wCID", "wCon", "wCSSF", "wCASF",
24274 "wC4", "wC5", "wC6", "wC7",
24275 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24276 "wC12", "wC13", "wC14", "wC15"
24279 fputs (wc_reg_names
[INTVAL (x
)], stream
);
24283 /* Print the high single-precision register of a VFP double-precision
24287 machine_mode mode
= GET_MODE (x
);
24290 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
24292 output_operand_lossage ("invalid operand for code '%c'", code
);
24297 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
24299 output_operand_lossage ("invalid operand for code '%c'", code
);
24303 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
24307 /* Print a VFP/Neon double precision or quad precision register name. */
24311 machine_mode mode
= GET_MODE (x
);
24312 int is_quad
= (code
== 'q');
24315 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
24317 output_operand_lossage ("invalid operand for code '%c'", code
);
24322 || !IS_VFP_REGNUM (REGNO (x
)))
24324 output_operand_lossage ("invalid operand for code '%c'", code
);
24329 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
24330 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
24332 output_operand_lossage ("invalid operand for code '%c'", code
);
24336 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
24337 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
24341 /* These two codes print the low/high doubleword register of a Neon quad
24342 register, respectively. For pair-structure types, can also print
24343 low/high quadword registers. */
24347 machine_mode mode
= GET_MODE (x
);
24350 if ((GET_MODE_SIZE (mode
) != 16
24351 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
24353 output_operand_lossage ("invalid operand for code '%c'", code
);
24358 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
24360 output_operand_lossage ("invalid operand for code '%c'", code
);
24364 if (GET_MODE_SIZE (mode
) == 16)
24365 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
24366 + (code
== 'f' ? 1 : 0));
24368 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
24369 + (code
== 'f' ? 1 : 0));
24373 /* Print a VFPv3 floating-point constant, represented as an integer
24377 int index
= vfp3_const_double_index (x
);
24378 gcc_assert (index
!= -1);
24379 fprintf (stream
, "%d", index
);
24383 /* Print bits representing opcode features for Neon.
24385 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24386 and polynomials as unsigned.
24388 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24390 Bit 2 is 1 for rounding functions, 0 otherwise. */
24392 /* Identify the type as 's', 'u', 'p' or 'f'. */
24395 HOST_WIDE_INT bits
= INTVAL (x
);
24396 fputc ("uspf"[bits
& 3], stream
);
24400 /* Likewise, but signed and unsigned integers are both 'i'. */
24403 HOST_WIDE_INT bits
= INTVAL (x
);
24404 fputc ("iipf"[bits
& 3], stream
);
24408 /* As for 'T', but emit 'u' instead of 'p'. */
24411 HOST_WIDE_INT bits
= INTVAL (x
);
24412 fputc ("usuf"[bits
& 3], stream
);
24416 /* Bit 2: rounding (vs none). */
24419 HOST_WIDE_INT bits
= INTVAL (x
);
24420 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
24424 /* Memory operand for vld1/vst1 instruction. */
24428 bool postinc
= FALSE
;
24429 rtx postinc_reg
= NULL
;
24430 unsigned align
, memsize
, align_bits
;
24432 gcc_assert (MEM_P (x
));
24433 addr
= XEXP (x
, 0);
24434 if (GET_CODE (addr
) == POST_INC
)
24437 addr
= XEXP (addr
, 0);
24439 if (GET_CODE (addr
) == POST_MODIFY
)
24441 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
24442 addr
= XEXP (addr
, 0);
24444 asm_fprintf (stream
, "[%r", REGNO (addr
));
24446 /* We know the alignment of this access, so we can emit a hint in the
24447 instruction (for some alignments) as an aid to the memory subsystem
24449 align
= MEM_ALIGN (x
) >> 3;
24450 memsize
= MEM_SIZE (x
);
24452 /* Only certain alignment specifiers are supported by the hardware. */
24453 if (memsize
== 32 && (align
% 32) == 0)
24455 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
24457 else if (memsize
>= 8 && (align
% 8) == 0)
24462 if (align_bits
!= 0)
24463 asm_fprintf (stream
, ":%d", align_bits
);
24465 asm_fprintf (stream
, "]");
24468 fputs("!", stream
);
24470 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
24474 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24475 rtx_code the memory operands output looks like following.
24477 2. [Rn, #+/-<imm>]!
24483 rtx postinc_reg
= NULL
;
24484 unsigned inc_val
= 0;
24485 enum rtx_code code
;
24487 gcc_assert (MEM_P (x
));
24488 addr
= XEXP (x
, 0);
24489 code
= GET_CODE (addr
);
24490 if (code
== POST_INC
|| code
== POST_DEC
|| code
== PRE_INC
24491 || code
== PRE_DEC
)
24493 asm_fprintf (stream
, "[%r", REGNO (XEXP (addr
, 0)));
24494 inc_val
= GET_MODE_SIZE (GET_MODE (x
));
24495 if (code
== POST_INC
|| code
== POST_DEC
)
24496 asm_fprintf (stream
, "], #%s%d",(code
== POST_INC
)
24497 ? "": "-", inc_val
);
24499 asm_fprintf (stream
, ", #%s%d]!",(code
== PRE_INC
)
24500 ? "": "-", inc_val
);
24502 else if (code
== POST_MODIFY
|| code
== PRE_MODIFY
)
24504 asm_fprintf (stream
, "[%r", REGNO (XEXP (addr
, 0)));
24505 postinc_reg
= XEXP (XEXP (addr
, 1), 1);
24506 if (postinc_reg
&& CONST_INT_P (postinc_reg
))
24508 if (code
== POST_MODIFY
)
24509 asm_fprintf (stream
, "], #%wd",INTVAL (postinc_reg
));
24511 asm_fprintf (stream
, ", #%wd]!",INTVAL (postinc_reg
));
24514 else if (code
== PLUS
)
24516 rtx base
= XEXP (addr
, 0);
24517 rtx index
= XEXP (addr
, 1);
24519 gcc_assert (REG_P (base
) && CONST_INT_P (index
));
24521 HOST_WIDE_INT offset
= INTVAL (index
);
24522 asm_fprintf (stream
, "[%r, #%wd]", REGNO (base
), offset
);
24526 gcc_assert (REG_P (addr
));
24527 asm_fprintf (stream
, "[%r]",REGNO (addr
));
24536 gcc_assert (MEM_P (x
));
24537 addr
= XEXP (x
, 0);
24538 gcc_assert (REG_P (addr
));
24539 asm_fprintf (stream
, "[%r]", REGNO (addr
));
24543 /* Translate an S register number into a D register number and element index. */
24546 machine_mode mode
= GET_MODE (x
);
24549 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
24551 output_operand_lossage ("invalid operand for code '%c'", code
);
24556 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
24558 output_operand_lossage ("invalid operand for code '%c'", code
);
24562 regno
= regno
- FIRST_VFP_REGNUM
;
24563 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
24568 gcc_assert (CONST_DOUBLE_P (x
));
24570 result
= vfp3_const_double_for_fract_bits (x
);
24572 result
= vfp3_const_double_for_bits (x
);
24573 fprintf (stream
, "#%d", result
);
24576 /* Register specifier for vld1.16/vst1.16. Translate the S register
24577 number into a D register number and element index. */
24580 machine_mode mode
= GET_MODE (x
);
24583 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
24585 output_operand_lossage ("invalid operand for code '%c'", code
);
24590 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
24592 output_operand_lossage ("invalid operand for code '%c'", code
);
24596 regno
= regno
- FIRST_VFP_REGNUM
;
24597 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
24604 output_operand_lossage ("missing operand");
24608 switch (GET_CODE (x
))
24611 asm_fprintf (stream
, "%r", REGNO (x
));
24615 output_address (GET_MODE (x
), XEXP (x
, 0));
24621 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
24622 sizeof (fpstr
), 0, 1);
24623 fprintf (stream
, "#%s", fpstr
);
24628 gcc_assert (GET_CODE (x
) != NEG
);
24629 fputc ('#', stream
);
24630 if (GET_CODE (x
) == HIGH
)
24632 fputs (":lower16:", stream
);
24636 output_addr_const (stream
, x
);
24642 /* Target hook for printing a memory address. */
24644 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
24648 int is_minus
= GET_CODE (x
) == MINUS
;
24651 asm_fprintf (stream
, "[%r]", REGNO (x
));
24652 else if (GET_CODE (x
) == PLUS
|| is_minus
)
24654 rtx base
= XEXP (x
, 0);
24655 rtx index
= XEXP (x
, 1);
24656 HOST_WIDE_INT offset
= 0;
24658 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
24660 /* Ensure that BASE is a register. */
24661 /* (one of them must be). */
24662 /* Also ensure the SP is not used as in index register. */
24663 std::swap (base
, index
);
24665 switch (GET_CODE (index
))
24668 offset
= INTVAL (index
);
24671 asm_fprintf (stream
, "[%r, #%wd]",
24672 REGNO (base
), offset
);
24676 asm_fprintf (stream
, "[%r, %s%r]",
24677 REGNO (base
), is_minus
? "-" : "",
24687 asm_fprintf (stream
, "[%r, %s%r",
24688 REGNO (base
), is_minus
? "-" : "",
24689 REGNO (XEXP (index
, 0)));
24690 arm_print_operand (stream
, index
, 'S');
24691 fputs ("]", stream
);
24696 gcc_unreachable ();
24699 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
24700 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
24702 gcc_assert (REG_P (XEXP (x
, 0)));
24704 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
24705 asm_fprintf (stream
, "[%r, #%s%d]!",
24706 REGNO (XEXP (x
, 0)),
24707 GET_CODE (x
) == PRE_DEC
? "-" : "",
24708 GET_MODE_SIZE (mode
));
24709 else if (TARGET_HAVE_MVE
&& (mode
== OImode
|| mode
== XImode
))
24710 asm_fprintf (stream
, "[%r]!", REGNO (XEXP (x
,0)));
24712 asm_fprintf (stream
, "[%r], #%s%d", REGNO (XEXP (x
, 0)),
24713 GET_CODE (x
) == POST_DEC
? "-" : "",
24714 GET_MODE_SIZE (mode
));
24716 else if (GET_CODE (x
) == PRE_MODIFY
)
24718 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
24719 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
24720 asm_fprintf (stream
, "#%wd]!",
24721 INTVAL (XEXP (XEXP (x
, 1), 1)));
24723 asm_fprintf (stream
, "%r]!",
24724 REGNO (XEXP (XEXP (x
, 1), 1)));
24726 else if (GET_CODE (x
) == POST_MODIFY
)
24728 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
24729 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
24730 asm_fprintf (stream
, "#%wd",
24731 INTVAL (XEXP (XEXP (x
, 1), 1)));
24733 asm_fprintf (stream
, "%r",
24734 REGNO (XEXP (XEXP (x
, 1), 1)));
24736 else output_addr_const (stream
, x
);
24741 asm_fprintf (stream
, "[%r]", REGNO (x
));
24742 else if (GET_CODE (x
) == POST_INC
)
24743 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
24744 else if (GET_CODE (x
) == PLUS
)
24746 gcc_assert (REG_P (XEXP (x
, 0)));
24747 if (CONST_INT_P (XEXP (x
, 1)))
24748 asm_fprintf (stream
, "[%r, #%wd]",
24749 REGNO (XEXP (x
, 0)),
24750 INTVAL (XEXP (x
, 1)));
24752 asm_fprintf (stream
, "[%r, %r]",
24753 REGNO (XEXP (x
, 0)),
24754 REGNO (XEXP (x
, 1)));
24757 output_addr_const (stream
, x
);
24761 /* Target hook for indicating whether a punctuation character for
24762 TARGET_PRINT_OPERAND is valid. */
24764 arm_print_operand_punct_valid_p (unsigned char code
)
24766 return (code
== '@' || code
== '|' || code
== '.'
24767 || code
== '(' || code
== ')' || code
== '#'
24768 || (TARGET_32BIT
&& (code
== '?'))
24769 || (TARGET_THUMB2
&& (code
== '!'))
24770 || (TARGET_THUMB
&& (code
== '_')));
24773 /* Target hook for assembling integer objects. The ARM version needs to
24774 handle word-sized values specially. */
24776 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
24780 if (size
== UNITS_PER_WORD
&& aligned_p
)
24782 fputs ("\t.word\t", asm_out_file
);
24783 output_addr_const (asm_out_file
, x
);
24785 /* Mark symbols as position independent. We only do this in the
24786 .text segment, not in the .data segment. */
24787 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
24788 (SYMBOL_REF_P (x
) || LABEL_REF_P (x
)))
24790 /* See legitimize_pic_address for an explanation of the
24791 TARGET_VXWORKS_RTP check. */
24792 /* References to weak symbols cannot be resolved locally:
24793 they may be overridden by a non-weak definition at link
24795 if (!arm_pic_data_is_text_relative
24796 || (SYMBOL_REF_P (x
)
24797 && (!SYMBOL_REF_LOCAL_P (x
)
24798 || (SYMBOL_REF_DECL (x
)
24799 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0)
24800 || (SYMBOL_REF_FUNCTION_P (x
)
24801 && !arm_fdpic_local_funcdesc_p (x
)))))
24803 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
24804 fputs ("(GOTFUNCDESC)", asm_out_file
);
24806 fputs ("(GOT)", asm_out_file
);
24810 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
24811 fputs ("(GOTOFFFUNCDESC)", asm_out_file
);
24817 || arm_is_segment_info_known (x
, &is_readonly
))
24818 fputs ("(GOTOFF)", asm_out_file
);
24820 fputs ("(GOT)", asm_out_file
);
24825 /* For FDPIC we also have to mark symbol for .data section. */
24827 && !making_const_table
24828 && SYMBOL_REF_P (x
)
24829 && SYMBOL_REF_FUNCTION_P (x
))
24830 fputs ("(FUNCDESC)", asm_out_file
);
24832 fputc ('\n', asm_out_file
);
24836 mode
= GET_MODE (x
);
24838 if (arm_vector_mode_supported_p (mode
))
24842 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
24844 units
= CONST_VECTOR_NUNITS (x
);
24845 size
= GET_MODE_UNIT_SIZE (mode
);
24847 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
24848 for (i
= 0; i
< units
; i
++)
24850 rtx elt
= CONST_VECTOR_ELT (x
, i
);
24852 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
24855 for (i
= 0; i
< units
; i
++)
24857 rtx elt
= CONST_VECTOR_ELT (x
, i
);
24859 (*CONST_DOUBLE_REAL_VALUE (elt
),
24860 as_a
<scalar_float_mode
> (GET_MODE_INNER (mode
)),
24861 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
24867 return default_assemble_integer (x
, size
, aligned_p
);
24871 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
24875 if (!TARGET_AAPCS_BASED
)
24878 default_named_section_asm_out_constructor
24879 : default_named_section_asm_out_destructor
) (symbol
, priority
);
24883 /* Put these in the .init_array section, using a special relocation. */
24884 if (priority
!= DEFAULT_INIT_PRIORITY
)
24887 sprintf (buf
, "%s.%.5u",
24888 is_ctor
? ".init_array" : ".fini_array",
24890 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
24897 switch_to_section (s
);
24898 assemble_align (POINTER_SIZE
);
24899 fputs ("\t.word\t", asm_out_file
);
24900 output_addr_const (asm_out_file
, symbol
);
24901 fputs ("(target1)\n", asm_out_file
);
24904 /* Add a function to the list of static constructors. */
24907 arm_elf_asm_constructor (rtx symbol
, int priority
)
24909 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
24912 /* Add a function to the list of static destructors. */
24915 arm_elf_asm_destructor (rtx symbol
, int priority
)
24917 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
24920 /* A finite state machine takes care of noticing whether or not instructions
24921 can be conditionally executed, and thus decrease execution time and code
24922 size by deleting branch instructions. The fsm is controlled by
24923 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
24925 /* The state of the fsm controlling condition codes are:
24926 0: normal, do nothing special
24927 1: make ASM_OUTPUT_OPCODE not output this instruction
24928 2: make ASM_OUTPUT_OPCODE not output this instruction
24929 3: make instructions conditional
24930 4: make instructions conditional
24932 State transitions (state->state by whom under condition):
24933 0 -> 1 final_prescan_insn if the `target' is a label
24934 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
24935 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
24936 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
24937 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
24938 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
24939 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
24940 (the target insn is arm_target_insn).
24942 If the jump clobbers the conditions then we use states 2 and 4.
24944 A similar thing can be done with conditional return insns.
24946 XXX In case the `target' is an unconditional branch, this conditionalising
24947 of the instructions always reduces code size, but not always execution
24948 time. But then, I want to reduce the code size to somewhere near what
24949 /bin/cc produces. */
24951 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
24952 instructions. When a COND_EXEC instruction is seen the subsequent
24953 instructions are scanned so that multiple conditional instructions can be
24954 combined into a single IT block. arm_condexec_count and arm_condexec_mask
24955 specify the length and true/false mask for the IT block. These will be
24956 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
24958 /* Returns the index of the ARM condition code string in
24959 `arm_condition_codes', or ARM_NV if the comparison is invalid.
24960 COMPARISON should be an rtx like `(eq (...) (...))'. */
24963 maybe_get_arm_condition_code (rtx comparison
)
24965 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
24966 enum arm_cond_code code
;
24967 enum rtx_code comp_code
= GET_CODE (comparison
);
24969 if (GET_MODE_CLASS (mode
) != MODE_CC
)
24970 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
24971 XEXP (comparison
, 1));
24975 case E_CC_DNEmode
: code
= ARM_NE
; goto dominance
;
24976 case E_CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
24977 case E_CC_DGEmode
: code
= ARM_GE
; goto dominance
;
24978 case E_CC_DGTmode
: code
= ARM_GT
; goto dominance
;
24979 case E_CC_DLEmode
: code
= ARM_LE
; goto dominance
;
24980 case E_CC_DLTmode
: code
= ARM_LT
; goto dominance
;
24981 case E_CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
24982 case E_CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
24983 case E_CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
24984 case E_CC_DLTUmode
: code
= ARM_CC
;
24987 if (comp_code
== EQ
)
24988 return ARM_INVERSE_CONDITION_CODE (code
);
24989 if (comp_code
== NE
)
24996 case NE
: return ARM_NE
;
24997 case EQ
: return ARM_EQ
;
24998 case GE
: return ARM_PL
;
24999 case LT
: return ARM_MI
;
25000 default: return ARM_NV
;
25006 case NE
: return ARM_NE
;
25007 case EQ
: return ARM_EQ
;
25008 default: return ARM_NV
;
25014 case NE
: return ARM_MI
;
25015 case EQ
: return ARM_PL
;
25016 default: return ARM_NV
;
25021 /* We can handle all cases except UNEQ and LTGT. */
25024 case GE
: return ARM_GE
;
25025 case GT
: return ARM_GT
;
25026 case LE
: return ARM_LS
;
25027 case LT
: return ARM_MI
;
25028 case NE
: return ARM_NE
;
25029 case EQ
: return ARM_EQ
;
25030 case ORDERED
: return ARM_VC
;
25031 case UNORDERED
: return ARM_VS
;
25032 case UNLT
: return ARM_LT
;
25033 case UNLE
: return ARM_LE
;
25034 case UNGT
: return ARM_HI
;
25035 case UNGE
: return ARM_PL
;
25036 /* UNEQ and LTGT do not have a representation. */
25037 case UNEQ
: /* Fall through. */
25038 case LTGT
: /* Fall through. */
25039 default: return ARM_NV
;
25045 case NE
: return ARM_NE
;
25046 case EQ
: return ARM_EQ
;
25047 case GE
: return ARM_LE
;
25048 case GT
: return ARM_LT
;
25049 case LE
: return ARM_GE
;
25050 case LT
: return ARM_GT
;
25051 case GEU
: return ARM_LS
;
25052 case GTU
: return ARM_CC
;
25053 case LEU
: return ARM_CS
;
25054 case LTU
: return ARM_HI
;
25055 default: return ARM_NV
;
25061 case LTU
: return ARM_CS
;
25062 case GEU
: return ARM_CC
;
25063 default: return ARM_NV
;
25069 case GE
: return ARM_GE
;
25070 case LT
: return ARM_LT
;
25071 default: return ARM_NV
;
25077 case GEU
: return ARM_CS
;
25078 case LTU
: return ARM_CC
;
25079 default: return ARM_NV
;
25085 case NE
: return ARM_VS
;
25086 case EQ
: return ARM_VC
;
25087 default: return ARM_NV
;
25093 case GEU
: return ARM_CS
;
25094 case LTU
: return ARM_CC
;
25095 default: return ARM_NV
;
25102 case NE
: return ARM_NE
;
25103 case EQ
: return ARM_EQ
;
25104 case GE
: return ARM_GE
;
25105 case GT
: return ARM_GT
;
25106 case LE
: return ARM_LE
;
25107 case LT
: return ARM_LT
;
25108 case GEU
: return ARM_CS
;
25109 case GTU
: return ARM_HI
;
25110 case LEU
: return ARM_LS
;
25111 case LTU
: return ARM_CC
;
25112 default: return ARM_NV
;
25115 default: gcc_unreachable ();
25119 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
25120 static enum arm_cond_code
25121 get_arm_condition_code (rtx comparison
)
25123 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
25124 gcc_assert (code
!= ARM_NV
);
25128 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
25129 code registers when not targetting Thumb1. The VFP condition register
25130 only exists when generating hard-float code. */
25132 arm_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
25138 *p2
= TARGET_VFP_BASE
? VFPCC_REGNUM
: INVALID_REGNUM
;
25142 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25145 thumb2_final_prescan_insn (rtx_insn
*insn
)
25147 rtx_insn
*first_insn
= insn
;
25148 rtx body
= PATTERN (insn
);
25150 enum arm_cond_code code
;
25155 /* max_insns_skipped in the tune was already taken into account in the
25156 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
25157 just emit the IT blocks as we can. It does not make sense to split
25159 max
= MAX_INSN_PER_IT_BLOCK
;
25161 /* Remove the previous insn from the count of insns to be output. */
25162 if (arm_condexec_count
)
25163 arm_condexec_count
--;
25165 /* Nothing to do if we are already inside a conditional block. */
25166 if (arm_condexec_count
)
25169 if (GET_CODE (body
) != COND_EXEC
)
25172 /* Conditional jumps are implemented directly. */
25176 predicate
= COND_EXEC_TEST (body
);
25177 arm_current_cc
= get_arm_condition_code (predicate
);
25179 n
= get_attr_ce_count (insn
);
25180 arm_condexec_count
= 1;
25181 arm_condexec_mask
= (1 << n
) - 1;
25182 arm_condexec_masklen
= n
;
25183 /* See if subsequent instructions can be combined into the same block. */
25186 insn
= next_nonnote_insn (insn
);
25188 /* Jumping into the middle of an IT block is illegal, so a label or
25189 barrier terminates the block. */
25190 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
25193 body
= PATTERN (insn
);
25194 /* USE and CLOBBER aren't really insns, so just skip them. */
25195 if (GET_CODE (body
) == USE
25196 || GET_CODE (body
) == CLOBBER
)
25199 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
25200 if (GET_CODE (body
) != COND_EXEC
)
25202 /* Maximum number of conditionally executed instructions in a block. */
25203 n
= get_attr_ce_count (insn
);
25204 if (arm_condexec_masklen
+ n
> max
)
25207 predicate
= COND_EXEC_TEST (body
);
25208 code
= get_arm_condition_code (predicate
);
25209 mask
= (1 << n
) - 1;
25210 if (arm_current_cc
== code
)
25211 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
25212 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
25215 arm_condexec_count
++;
25216 arm_condexec_masklen
+= n
;
25218 /* A jump must be the last instruction in a conditional block. */
25222 /* Restore recog_data (getting the attributes of other insns can
25223 destroy this array, but final.cc assumes that it remains intact
25224 across this call). */
25225 extract_constrain_insn_cached (first_insn
);
25229 arm_final_prescan_insn (rtx_insn
*insn
)
25231 /* BODY will hold the body of INSN. */
25232 rtx body
= PATTERN (insn
);
25234 /* This will be 1 if trying to repeat the trick, and things need to be
25235 reversed if it appears to fail. */
25238 /* If we start with a return insn, we only succeed if we find another one. */
25239 int seeking_return
= 0;
25240 enum rtx_code return_code
= UNKNOWN
;
25242 /* START_INSN will hold the insn from where we start looking. This is the
25243 first insn after the following code_label if REVERSE is true. */
25244 rtx_insn
*start_insn
= insn
;
25246 /* If in state 4, check if the target branch is reached, in order to
25247 change back to state 0. */
25248 if (arm_ccfsm_state
== 4)
25250 if (insn
== arm_target_insn
)
25252 arm_target_insn
= NULL
;
25253 arm_ccfsm_state
= 0;
25258 /* If in state 3, it is possible to repeat the trick, if this insn is an
25259 unconditional branch to a label, and immediately following this branch
25260 is the previous target label which is only used once, and the label this
25261 branch jumps to is not too far off. */
25262 if (arm_ccfsm_state
== 3)
25264 if (simplejump_p (insn
))
25266 start_insn
= next_nonnote_insn (start_insn
);
25267 if (BARRIER_P (start_insn
))
25269 /* XXX Isn't this always a barrier? */
25270 start_insn
= next_nonnote_insn (start_insn
);
25272 if (LABEL_P (start_insn
)
25273 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
25274 && LABEL_NUSES (start_insn
) == 1)
25279 else if (ANY_RETURN_P (body
))
25281 start_insn
= next_nonnote_insn (start_insn
);
25282 if (BARRIER_P (start_insn
))
25283 start_insn
= next_nonnote_insn (start_insn
);
25284 if (LABEL_P (start_insn
)
25285 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
25286 && LABEL_NUSES (start_insn
) == 1)
25289 seeking_return
= 1;
25290 return_code
= GET_CODE (body
);
25299 gcc_assert (!arm_ccfsm_state
|| reverse
);
25300 if (!JUMP_P (insn
))
25303 /* This jump might be paralleled with a clobber of the condition codes
25304 the jump should always come first */
25305 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
25306 body
= XVECEXP (body
, 0, 0);
25309 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
25310 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
25313 int fail
= FALSE
, succeed
= FALSE
;
25314 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25315 int then_not_else
= TRUE
;
25316 rtx_insn
*this_insn
= start_insn
;
25319 /* Register the insn jumped to. */
25322 if (!seeking_return
)
25323 label
= XEXP (SET_SRC (body
), 0);
25325 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
25326 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
25327 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
25329 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
25330 then_not_else
= FALSE
;
25332 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
25334 seeking_return
= 1;
25335 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
25337 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
25339 seeking_return
= 1;
25340 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
25341 then_not_else
= FALSE
;
25344 gcc_unreachable ();
25346 /* See how many insns this branch skips, and what kind of insns. If all
25347 insns are okay, and the label or unconditional branch to the same
25348 label is not too far away, succeed. */
25349 for (insns_skipped
= 0;
25350 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
25354 this_insn
= next_nonnote_insn (this_insn
);
25358 switch (GET_CODE (this_insn
))
25361 /* Succeed if it is the target label, otherwise fail since
25362 control falls in from somewhere else. */
25363 if (this_insn
== label
)
25365 arm_ccfsm_state
= 1;
25373 /* Succeed if the following insn is the target label.
25375 If return insns are used then the last insn in a function
25376 will be a barrier. */
25377 this_insn
= next_nonnote_insn (this_insn
);
25378 if (this_insn
&& this_insn
== label
)
25380 arm_ccfsm_state
= 1;
25388 /* The AAPCS says that conditional calls should not be
25389 used since they make interworking inefficient (the
25390 linker can't transform BL<cond> into BLX). That's
25391 only a problem if the machine has BLX. */
25398 /* Succeed if the following insn is the target label, or
25399 if the following two insns are a barrier and the
25401 this_insn
= next_nonnote_insn (this_insn
);
25402 if (this_insn
&& BARRIER_P (this_insn
))
25403 this_insn
= next_nonnote_insn (this_insn
);
25405 if (this_insn
&& this_insn
== label
25406 && insns_skipped
< max_insns_skipped
)
25408 arm_ccfsm_state
= 1;
25416 /* If this is an unconditional branch to the same label, succeed.
25417 If it is to another label, do nothing. If it is conditional,
25419 /* XXX Probably, the tests for SET and the PC are
25422 scanbody
= PATTERN (this_insn
);
25423 if (GET_CODE (scanbody
) == SET
25424 && GET_CODE (SET_DEST (scanbody
)) == PC
)
25426 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
25427 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
25429 arm_ccfsm_state
= 2;
25432 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
25435 /* Fail if a conditional return is undesirable (e.g. on a
25436 StrongARM), but still allow this if optimizing for size. */
25437 else if (GET_CODE (scanbody
) == return_code
25438 && !use_return_insn (TRUE
, NULL
)
25441 else if (GET_CODE (scanbody
) == return_code
)
25443 arm_ccfsm_state
= 2;
25446 else if (GET_CODE (scanbody
) == PARALLEL
)
25448 switch (get_attr_conds (this_insn
))
25458 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
25463 /* Instructions using or affecting the condition codes make it
25465 scanbody
= PATTERN (this_insn
);
25466 if (!(GET_CODE (scanbody
) == SET
25467 || GET_CODE (scanbody
) == PARALLEL
)
25468 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
25478 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
25479 arm_target_label
= CODE_LABEL_NUMBER (label
);
25482 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
25484 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
25486 this_insn
= next_nonnote_insn (this_insn
);
25487 gcc_assert (!this_insn
25488 || (!BARRIER_P (this_insn
)
25489 && !LABEL_P (this_insn
)));
25493 /* Oh, dear! we ran off the end.. give up. */
25494 extract_constrain_insn_cached (insn
);
25495 arm_ccfsm_state
= 0;
25496 arm_target_insn
= NULL
;
25499 arm_target_insn
= this_insn
;
25502 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25505 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
25507 if (reverse
|| then_not_else
)
25508 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
25511 /* Restore recog_data (getting the attributes of other insns can
25512 destroy this array, but final.cc assumes that it remains intact
25513 across this call. */
25514 extract_constrain_insn_cached (insn
);
25518 /* Output IT instructions. */
25520 thumb2_asm_output_opcode (FILE * stream
)
25525 if (arm_condexec_mask
)
25527 for (n
= 0; n
< arm_condexec_masklen
; n
++)
25528 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
25530 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
25531 arm_condition_codes
[arm_current_cc
]);
25532 arm_condexec_mask
= 0;
25536 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25537 UNITS_PER_WORD bytes wide. */
25538 static unsigned int
25539 arm_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
25541 if (IS_VPR_REGNUM (regno
))
25542 return CEIL (GET_MODE_SIZE (mode
), 2);
25545 && regno
> PC_REGNUM
25546 && regno
!= FRAME_POINTER_REGNUM
25547 && regno
!= ARG_POINTER_REGNUM
25548 && !IS_VFP_REGNUM (regno
))
25551 return ARM_NUM_REGS (mode
);
25554 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25556 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
25558 if (GET_MODE_CLASS (mode
) == MODE_CC
)
25559 return (regno
== CC_REGNUM
25560 || (TARGET_VFP_BASE
25561 && regno
== VFPCC_REGNUM
));
25563 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
25566 if (IS_VPR_REGNUM (regno
))
25567 return mode
== HImode
25568 || mode
== V16BImode
25569 || mode
== V8BImode
25570 || mode
== V4BImode
;
25573 /* For the Thumb we only allow values bigger than SImode in
25574 registers 0 - 6, so that there is always a second low
25575 register available to hold the upper part of the value.
25576 We probably we ought to ensure that the register is the
25577 start of an even numbered register pair. */
25578 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
25580 if (TARGET_VFP_BASE
&& IS_VFP_REGNUM (regno
))
25582 if (mode
== DFmode
|| mode
== DImode
)
25583 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
25585 if (mode
== HFmode
|| mode
== BFmode
|| mode
== HImode
25586 || mode
== SFmode
|| mode
== SImode
)
25587 return VFP_REGNO_OK_FOR_SINGLE (regno
);
25590 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
25591 || (VALID_NEON_QREG_MODE (mode
)
25592 && NEON_REGNO_OK_FOR_QUAD (regno
))
25593 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
25594 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
25595 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25596 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
25597 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
25598 if (TARGET_HAVE_MVE
)
25599 return ((VALID_MVE_MODE (mode
) && NEON_REGNO_OK_FOR_QUAD (regno
))
25600 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25601 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8)));
25606 if (TARGET_REALLY_IWMMXT
)
25608 if (IS_IWMMXT_GR_REGNUM (regno
))
25609 return mode
== SImode
;
25611 if (IS_IWMMXT_REGNUM (regno
))
25612 return VALID_IWMMXT_REG_MODE (mode
);
25615 /* We allow almost any value to be stored in the general registers.
25616 Restrict doubleword quantities to even register pairs in ARM state
25617 so that we can use ldrd. The same restriction applies for MVE
25618 in order to support Armv8.1-M Mainline instructions.
25619 Do not allow very large Neon structure opaque modes in general
25620 registers; they would use too many. */
25621 if (regno
<= LAST_ARM_REGNUM
)
25623 if (ARM_NUM_REGS (mode
) > 4)
25626 if (TARGET_THUMB2
&& !(TARGET_HAVE_MVE
|| TARGET_CDE
))
25629 return !((TARGET_LDRD
|| TARGET_CDE
)
25630 && GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
25633 if (regno
== FRAME_POINTER_REGNUM
25634 || regno
== ARG_POINTER_REGNUM
)
25635 /* We only allow integers in the fake hard registers. */
25636 return GET_MODE_CLASS (mode
) == MODE_INT
;
25641 /* Implement TARGET_MODES_TIEABLE_P. */
25644 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
25646 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
25649 /* We specifically want to allow elements of "structure" modes to
25650 be tieable to the structure. This more general condition allows
25651 other rarer situations too. */
25653 && (VALID_NEON_DREG_MODE (mode1
)
25654 || VALID_NEON_QREG_MODE (mode1
)
25655 || VALID_NEON_STRUCT_MODE (mode1
))
25656 && (VALID_NEON_DREG_MODE (mode2
)
25657 || VALID_NEON_QREG_MODE (mode2
)
25658 || VALID_NEON_STRUCT_MODE (mode2
)))
25659 || (TARGET_HAVE_MVE
25660 && (VALID_MVE_MODE (mode1
)
25661 || VALID_MVE_STRUCT_MODE (mode1
))
25662 && (VALID_MVE_MODE (mode2
)
25663 || VALID_MVE_STRUCT_MODE (mode2
))))
25669 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25670 not used in arm mode. */
25673 arm_regno_class (int regno
)
25675 if (regno
== PC_REGNUM
)
25678 if (IS_VPR_REGNUM (regno
))
25683 if (regno
== STACK_POINTER_REGNUM
)
25685 if (regno
== CC_REGNUM
)
25692 if (TARGET_THUMB2
&& regno
< 8)
25695 if ( regno
<= LAST_ARM_REGNUM
25696 || regno
== FRAME_POINTER_REGNUM
25697 || regno
== ARG_POINTER_REGNUM
)
25698 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
25700 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
25701 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
25703 if (IS_VFP_REGNUM (regno
))
25705 if (regno
<= D7_VFP_REGNUM
)
25706 return VFP_D0_D7_REGS
;
25707 else if (regno
<= LAST_LO_VFP_REGNUM
)
25708 return VFP_LO_REGS
;
25710 return VFP_HI_REGS
;
25713 if (IS_IWMMXT_REGNUM (regno
))
25714 return IWMMXT_REGS
;
25716 if (IS_IWMMXT_GR_REGNUM (regno
))
25717 return IWMMXT_GR_REGS
;
25722 /* Handle a special case when computing the offset
25723 of an argument from the frame pointer. */
25725 arm_debugger_arg_offset (int value
, rtx addr
)
25729 /* We are only interested if dbxout_parms() failed to compute the offset. */
25733 /* We can only cope with the case where the address is held in a register. */
25737 /* If we are using the frame pointer to point at the argument, then
25738 an offset of 0 is correct. */
25739 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
25742 /* If we are using the stack pointer to point at the
25743 argument, then an offset of 0 is correct. */
25744 /* ??? Check this is consistent with thumb2 frame layout. */
25745 if ((TARGET_THUMB
|| !frame_pointer_needed
)
25746 && REGNO (addr
) == SP_REGNUM
)
25749 /* Oh dear. The argument is pointed to by a register rather
25750 than being held in a register, or being stored at a known
25751 offset from the frame pointer. Since GDB only understands
25752 those two kinds of argument we must translate the address
25753 held in the register into an offset from the frame pointer.
25754 We do this by searching through the insns for the function
25755 looking to see where this register gets its value. If the
25756 register is initialized from the frame pointer plus an offset
25757 then we are in luck and we can continue, otherwise we give up.
25759 This code is exercised by producing debugging information
25760 for a function with arguments like this:
25762 double func (double a, double b, int c, double d) {return d;}
25764 Without this code the stab for parameter 'd' will be set to
25765 an offset of 0 from the frame pointer, rather than 8. */
25767 /* The if() statement says:
25769 If the insn is a normal instruction
25770 and if the insn is setting the value in a register
25771 and if the register being set is the register holding the address of the argument
25772 and if the address is computing by an addition
25773 that involves adding to a register
25774 which is the frame pointer
25779 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
25781 if ( NONJUMP_INSN_P (insn
)
25782 && GET_CODE (PATTERN (insn
)) == SET
25783 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
25784 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
25785 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
25786 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25787 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
25790 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
25799 warning (0, "unable to compute real location of stacked parameter");
25800 value
= 8; /* XXX magic hack */
25806 /* Implement TARGET_PROMOTED_TYPE. */
25809 arm_promoted_type (const_tree t
)
25811 if (SCALAR_FLOAT_TYPE_P (t
)
25812 && TYPE_PRECISION (t
) == 16
25813 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
25814 return float_type_node
;
25818 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25819 This simply adds HFmode as a supported mode; even though we don't
25820 implement arithmetic on this type directly, it's supported by
25821 optabs conversions, much the way the double-word arithmetic is
25822 special-cased in the default hook. */
25825 arm_scalar_mode_supported_p (scalar_mode mode
)
25827 if (mode
== HFmode
)
25828 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
25829 else if (ALL_FIXED_POINT_MODE_P (mode
))
25832 return default_scalar_mode_supported_p (mode
);
25835 /* Set the value of FLT_EVAL_METHOD.
25836 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25838 0: evaluate all operations and constants, whose semantic type has at
25839 most the range and precision of type float, to the range and
25840 precision of float; evaluate all other operations and constants to
25841 the range and precision of the semantic type;
25843 N, where _FloatN is a supported interchange floating type
25844 evaluate all operations and constants, whose semantic type has at
25845 most the range and precision of _FloatN type, to the range and
25846 precision of the _FloatN type; evaluate all other operations and
25847 constants to the range and precision of the semantic type;
25849 If we have the ARMv8.2-A extensions then we support _Float16 in native
25850 precision, so we should set this to 16. Otherwise, we support the type,
25851 but want to evaluate expressions in float precision, so set this to
25854 static enum flt_eval_method
25855 arm_excess_precision (enum excess_precision_type type
)
25859 case EXCESS_PRECISION_TYPE_FAST
:
25860 case EXCESS_PRECISION_TYPE_STANDARD
:
25861 /* We can calculate either in 16-bit range and precision or
25862 32-bit range and precision. Make that decision based on whether
25863 we have native support for the ARMv8.2-A 16-bit floating-point
25864 instructions or not. */
25865 return (TARGET_VFP_FP16INST
25866 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25867 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
25868 case EXCESS_PRECISION_TYPE_IMPLICIT
:
25869 case EXCESS_PRECISION_TYPE_FLOAT16
:
25870 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
25872 gcc_unreachable ();
25874 return FLT_EVAL_METHOD_UNPREDICTABLE
;
25878 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
25879 _Float16 if we are using anything other than ieee format for 16-bit
25880 floating point. Otherwise, punt to the default implementation. */
25881 static opt_scalar_float_mode
25882 arm_floatn_mode (int n
, bool extended
)
25884 if (!extended
&& n
== 16)
25886 if (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
)
25888 return opt_scalar_float_mode ();
25891 return default_floatn_mode (n
, extended
);
25895 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25896 not to early-clobber SRC registers in the process.
25898 We assume that the operands described by SRC and DEST represent a
25899 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25900 number of components into which the copy has been decomposed. */
25902 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
25906 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
25907 || REGNO (operands
[0]) < REGNO (operands
[1]))
25909 for (i
= 0; i
< count
; i
++)
25911 operands
[2 * i
] = dest
[i
];
25912 operands
[2 * i
+ 1] = src
[i
];
25917 for (i
= 0; i
< count
; i
++)
25919 operands
[2 * i
] = dest
[count
- i
- 1];
25920 operands
[2 * i
+ 1] = src
[count
- i
- 1];
25925 /* Split operands into moves from op[1] + op[2] into op[0]. */
25928 neon_split_vcombine (rtx operands
[3])
25930 unsigned int dest
= REGNO (operands
[0]);
25931 unsigned int src1
= REGNO (operands
[1]);
25932 unsigned int src2
= REGNO (operands
[2]);
25933 machine_mode halfmode
= GET_MODE (operands
[1]);
25934 unsigned int halfregs
= REG_NREGS (operands
[1]);
25935 rtx destlo
, desthi
;
25937 if (src1
== dest
&& src2
== dest
+ halfregs
)
25939 /* No-op move. Can't split to nothing; emit something. */
25940 emit_note (NOTE_INSN_DELETED
);
25944 /* Preserve register attributes for variable tracking. */
25945 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
25946 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
25947 GET_MODE_SIZE (halfmode
));
25949 /* Special case of reversed high/low parts. Use VSWP. */
25950 if (src2
== dest
&& src1
== dest
+ halfregs
)
25952 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
25953 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
25954 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
25958 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
25960 /* Try to avoid unnecessary moves if part of the result
25961 is in the right place already. */
25963 emit_move_insn (destlo
, operands
[1]);
25964 if (src2
!= dest
+ halfregs
)
25965 emit_move_insn (desthi
, operands
[2]);
25969 if (src2
!= dest
+ halfregs
)
25970 emit_move_insn (desthi
, operands
[2]);
25972 emit_move_insn (destlo
, operands
[1]);
25976 /* Return the number (counting from 0) of
25977 the least significant set bit in MASK. */
25980 number_of_first_bit_set (unsigned mask
)
25982 return ctz_hwi (mask
);
25985 /* Like emit_multi_reg_push, but allowing for a different set of
25986 registers to be described as saved. MASK is the set of registers
25987 to be saved; REAL_REGS is the set of registers to be described as
25988 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25991 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
25993 unsigned long regno
;
25994 rtx par
[10], tmp
, reg
;
25998 /* Build the parallel of the registers actually being stored. */
25999 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
26001 regno
= ctz_hwi (mask
);
26002 reg
= gen_rtx_REG (SImode
, regno
);
26005 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
26007 tmp
= gen_rtx_USE (VOIDmode
, reg
);
26012 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26013 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
26014 tmp
= gen_frame_mem (BLKmode
, tmp
);
26015 tmp
= gen_rtx_SET (tmp
, par
[0]);
26018 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
26019 insn
= emit_insn (tmp
);
26021 /* Always build the stack adjustment note for unwind info. */
26022 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26023 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
26026 /* Build the parallel of the registers recorded as saved for unwind. */
26027 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
26029 regno
= ctz_hwi (real_regs
);
26030 reg
= gen_rtx_REG (SImode
, regno
);
26032 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
26033 tmp
= gen_frame_mem (SImode
, tmp
);
26034 tmp
= gen_rtx_SET (tmp
, reg
);
26035 RTX_FRAME_RELATED_P (tmp
) = 1;
26043 RTX_FRAME_RELATED_P (par
[0]) = 1;
26044 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
26047 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
26052 /* Emit code to push or pop registers to or from the stack. F is the
26053 assembly file. MASK is the registers to pop. */
26055 thumb_pop (FILE *f
, unsigned long mask
)
26058 int lo_mask
= mask
& 0xFF;
26062 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
26064 /* Special case. Do not generate a POP PC statement here, do it in
26066 thumb_exit (f
, -1);
26070 fprintf (f
, "\tpop\t{");
26072 /* Look at the low registers first. */
26073 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
26077 asm_fprintf (f
, "%r", regno
);
26079 if ((lo_mask
& ~1) != 0)
26084 if (mask
& (1 << PC_REGNUM
))
26086 /* Catch popping the PC. */
26087 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
26088 || IS_CMSE_ENTRY (arm_current_func_type ()))
26090 /* The PC is never poped directly, instead
26091 it is popped into r3 and then BX is used. */
26092 fprintf (f
, "}\n");
26094 thumb_exit (f
, -1);
26103 asm_fprintf (f
, "%r", PC_REGNUM
);
26107 fprintf (f
, "}\n");
26110 /* Generate code to return from a thumb function.
26111 If 'reg_containing_return_addr' is -1, then the return address is
26112 actually on the stack, at the stack pointer.
26114 Note: do not forget to update length attribute of corresponding insn pattern
26115 when changing assembly output (eg. length attribute of epilogue_insns when
26116 updating Armv8-M Baseline Security Extensions register clearing
26119 thumb_exit (FILE *f
, int reg_containing_return_addr
)
26121 unsigned regs_available_for_popping
;
26122 unsigned regs_to_pop
;
26124 unsigned available
;
26128 int restore_a4
= FALSE
;
26130 /* Compute the registers we need to pop. */
26134 if (reg_containing_return_addr
== -1)
26136 regs_to_pop
|= 1 << LR_REGNUM
;
26140 if (TARGET_BACKTRACE
)
26142 /* Restore the (ARM) frame pointer and stack pointer. */
26143 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
26147 /* If there is nothing to pop then just emit the BX instruction and
26149 if (pops_needed
== 0)
26151 if (crtl
->calls_eh_return
)
26152 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26154 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26156 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26157 emitted by cmse_nonsecure_entry_clear_before_return (). */
26158 if (!TARGET_HAVE_FPCXT_CMSE
)
26159 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
26160 reg_containing_return_addr
);
26161 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
26164 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26167 /* Otherwise if we are not supporting interworking and we have not created
26168 a backtrace structure and the function was not entered in ARM mode then
26169 just pop the return address straight into the PC. */
26170 else if (!TARGET_INTERWORK
26171 && !TARGET_BACKTRACE
26172 && !is_called_in_ARM_mode (current_function_decl
)
26173 && !crtl
->calls_eh_return
26174 && !IS_CMSE_ENTRY (arm_current_func_type ()))
26176 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
26180 /* Find out how many of the (return) argument registers we can corrupt. */
26181 regs_available_for_popping
= 0;
26183 /* If returning via __builtin_eh_return, the bottom three registers
26184 all contain information needed for the return. */
26185 if (crtl
->calls_eh_return
)
26189 /* If we can deduce the registers used from the function's
26190 return value. This is more reliable that examining
26191 df_regs_ever_live_p () because that will be set if the register is
26192 ever used in the function, not just if the register is used
26193 to hold a return value. */
26195 if (crtl
->return_rtx
!= 0)
26196 mode
= GET_MODE (crtl
->return_rtx
);
26198 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
26200 size
= GET_MODE_SIZE (mode
);
26204 /* In a void function we can use any argument register.
26205 In a function that returns a structure on the stack
26206 we can use the second and third argument registers. */
26207 if (mode
== VOIDmode
)
26208 regs_available_for_popping
=
26209 (1 << ARG_REGISTER (1))
26210 | (1 << ARG_REGISTER (2))
26211 | (1 << ARG_REGISTER (3));
26213 regs_available_for_popping
=
26214 (1 << ARG_REGISTER (2))
26215 | (1 << ARG_REGISTER (3));
26217 else if (size
<= 4)
26218 regs_available_for_popping
=
26219 (1 << ARG_REGISTER (2))
26220 | (1 << ARG_REGISTER (3));
26221 else if (size
<= 8)
26222 regs_available_for_popping
=
26223 (1 << ARG_REGISTER (3));
26226 /* Match registers to be popped with registers into which we pop them. */
26227 for (available
= regs_available_for_popping
,
26228 required
= regs_to_pop
;
26229 required
!= 0 && available
!= 0;
26230 available
&= ~(available
& - available
),
26231 required
&= ~(required
& - required
))
26234 /* If we have any popping registers left over, remove them. */
26236 regs_available_for_popping
&= ~available
;
26238 /* Otherwise if we need another popping register we can use
26239 the fourth argument register. */
26240 else if (pops_needed
)
26242 /* If we have not found any free argument registers and
26243 reg a4 contains the return address, we must move it. */
26244 if (regs_available_for_popping
== 0
26245 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
26247 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26248 reg_containing_return_addr
= LR_REGNUM
;
26250 else if (size
> 12)
26252 /* Register a4 is being used to hold part of the return value,
26253 but we have dire need of a free, low register. */
26256 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
26259 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
26261 /* The fourth argument register is available. */
26262 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
26268 /* Pop as many registers as we can. */
26269 thumb_pop (f
, regs_available_for_popping
);
26271 /* Process the registers we popped. */
26272 if (reg_containing_return_addr
== -1)
26274 /* The return address was popped into the lowest numbered register. */
26275 regs_to_pop
&= ~(1 << LR_REGNUM
);
26277 reg_containing_return_addr
=
26278 number_of_first_bit_set (regs_available_for_popping
);
26280 /* Remove this register for the mask of available registers, so that
26281 the return address will not be corrupted by further pops. */
26282 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
26285 /* If we popped other registers then handle them here. */
26286 if (regs_available_for_popping
)
26290 /* Work out which register currently contains the frame pointer. */
26291 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26293 /* Move it into the correct place. */
26294 asm_fprintf (f
, "\tmov\t%r, %r\n",
26295 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
26297 /* (Temporarily) remove it from the mask of popped registers. */
26298 regs_available_for_popping
&= ~(1 << frame_pointer
);
26299 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
26301 if (regs_available_for_popping
)
26305 /* We popped the stack pointer as well,
26306 find the register that contains it. */
26307 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26309 /* Move it into the stack register. */
26310 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
26312 /* At this point we have popped all necessary registers, so
26313 do not worry about restoring regs_available_for_popping
26314 to its correct value:
26316 assert (pops_needed == 0)
26317 assert (regs_available_for_popping == (1 << frame_pointer))
26318 assert (regs_to_pop == (1 << STACK_POINTER)) */
26322 /* Since we have just move the popped value into the frame
26323 pointer, the popping register is available for reuse, and
26324 we know that we still have the stack pointer left to pop. */
26325 regs_available_for_popping
|= (1 << frame_pointer
);
26329 /* If we still have registers left on the stack, but we no longer have
26330 any registers into which we can pop them, then we must move the return
26331 address into the link register and make available the register that
26333 if (regs_available_for_popping
== 0 && pops_needed
> 0)
26335 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
26337 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
26338 reg_containing_return_addr
);
26340 reg_containing_return_addr
= LR_REGNUM
;
26343 /* If we have registers left on the stack then pop some more.
26344 We know that at most we will want to pop FP and SP. */
26345 if (pops_needed
> 0)
26350 thumb_pop (f
, regs_available_for_popping
);
26352 /* We have popped either FP or SP.
26353 Move whichever one it is into the correct register. */
26354 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26355 move_to
= number_of_first_bit_set (regs_to_pop
);
26357 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
26361 /* If we still have not popped everything then we must have only
26362 had one register available to us and we are now popping the SP. */
26363 if (pops_needed
> 0)
26367 thumb_pop (f
, regs_available_for_popping
);
26369 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26371 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
26373 assert (regs_to_pop == (1 << STACK_POINTER))
26374 assert (pops_needed == 1)
26378 /* If necessary restore the a4 register. */
26381 if (reg_containing_return_addr
!= LR_REGNUM
)
26383 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26384 reg_containing_return_addr
= LR_REGNUM
;
26387 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
26390 if (crtl
->calls_eh_return
)
26391 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26393 /* Return to caller. */
26394 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26396 /* This is for the cases where LR is not being used to contain the return
26397 address. It may therefore contain information that we might not want
26398 to leak, hence it must be cleared. The value in R0 will never be a
26399 secret at this point, so it is safe to use it, see the clearing code
26400 in cmse_nonsecure_entry_clear_before_return (). */
26401 if (reg_containing_return_addr
!= LR_REGNUM
)
26402 asm_fprintf (f
, "\tmov\tlr, r0\n");
26404 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26405 by cmse_nonsecure_entry_clear_before_return (). */
26406 if (!TARGET_HAVE_FPCXT_CMSE
)
26407 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
26408 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
26411 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26414 /* Scan INSN just before assembler is output for it.
26415 For Thumb-1, we track the status of the condition codes; this
26416 information is used in the cbranchsi4_insn pattern. */
26418 thumb1_final_prescan_insn (rtx_insn
*insn
)
26420 if (flag_print_asm_name
)
26421 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
26422 INSN_ADDRESSES (INSN_UID (insn
)));
26423 /* Don't overwrite the previous setter when we get to a cbranch. */
26424 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
26426 enum attr_conds conds
;
26428 if (cfun
->machine
->thumb1_cc_insn
)
26430 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
26431 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
26434 conds
= get_attr_conds (insn
);
26435 if (conds
== CONDS_SET
)
26437 rtx set
= single_set (insn
);
26438 cfun
->machine
->thumb1_cc_insn
= insn
;
26439 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
26440 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
26441 cfun
->machine
->thumb1_cc_mode
= CC_NZmode
;
26442 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
26444 rtx src1
= XEXP (SET_SRC (set
), 1);
26445 if (src1
== const0_rtx
)
26446 cfun
->machine
->thumb1_cc_mode
= CCmode
;
26448 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
26450 /* Record the src register operand instead of dest because
26451 cprop_hardreg pass propagates src. */
26452 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
26455 else if (conds
!= CONDS_NOCOND
)
26456 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
26459 /* Check if unexpected far jump is used. */
26460 if (cfun
->machine
->lr_save_eliminated
26461 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26462 internal_error("Unexpected thumb1 far jump");
26466 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
26468 unsigned HOST_WIDE_INT mask
= 0xff;
26471 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
26472 if (val
== 0) /* XXX */
26475 for (i
= 0; i
< 25; i
++)
26476 if ((val
& (mask
<< i
)) == val
)
26482 /* Returns nonzero if the current function contains,
26483 or might contain a far jump. */
26485 thumb_far_jump_used_p (void)
26488 bool far_jump
= false;
26489 unsigned int func_size
= 0;
26491 /* If we have already decided that far jumps may be used,
26492 do not bother checking again, and always return true even if
26493 it turns out that they are not being used. Once we have made
26494 the decision that far jumps are present (and that hence the link
26495 register will be pushed onto the stack) we cannot go back on it. */
26496 if (cfun
->machine
->far_jump_used
)
26499 /* If this function is not being called from the prologue/epilogue
26500 generation code then it must be being called from the
26501 INITIAL_ELIMINATION_OFFSET macro. */
26502 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
26504 /* In this case we know that we are being asked about the elimination
26505 of the arg pointer register. If that register is not being used,
26506 then there are no arguments on the stack, and we do not have to
26507 worry that a far jump might force the prologue to push the link
26508 register, changing the stack offsets. In this case we can just
26509 return false, since the presence of far jumps in the function will
26510 not affect stack offsets.
26512 If the arg pointer is live (or if it was live, but has now been
26513 eliminated and so set to dead) then we do have to test to see if
26514 the function might contain a far jump. This test can lead to some
26515 false negatives, since before reload is completed, then length of
26516 branch instructions is not known, so gcc defaults to returning their
26517 longest length, which in turn sets the far jump attribute to true.
26519 A false negative will not result in bad code being generated, but it
26520 will result in a needless push and pop of the link register. We
26521 hope that this does not occur too often.
26523 If we need doubleword stack alignment this could affect the other
26524 elimination offsets so we can't risk getting it wrong. */
26525 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
26526 cfun
->machine
->arg_pointer_live
= 1;
26527 else if (!cfun
->machine
->arg_pointer_live
)
26531 /* We should not change far_jump_used during or after reload, as there is
26532 no chance to change stack frame layout. */
26533 if (reload_in_progress
|| reload_completed
)
26536 /* Check to see if the function contains a branch
26537 insn with the far jump attribute set. */
26538 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
26540 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26544 func_size
+= get_attr_length (insn
);
26547 /* Attribute far_jump will always be true for thumb1 before
26548 shorten_branch pass. So checking far_jump attribute before
26549 shorten_branch isn't much useful.
26551 Following heuristic tries to estimate more accurately if a far jump
26552 may finally be used. The heuristic is very conservative as there is
26553 no chance to roll-back the decision of not to use far jump.
26555 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26556 2-byte insn is associated with a 4 byte constant pool. Using
26557 function size 2048/3 as the threshold is conservative enough. */
26560 if ((func_size
* 3) >= 2048)
26562 /* Record the fact that we have decided that
26563 the function does use far jumps. */
26564 cfun
->machine
->far_jump_used
= 1;
26572 /* Return nonzero if FUNC must be entered in ARM mode. */
26574 is_called_in_ARM_mode (tree func
)
26576 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
26578 /* Ignore the problem about functions whose address is taken. */
26579 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
26583 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
26589 /* Given the stack offsets and register mask in OFFSETS, decide how
26590 many additional registers to push instead of subtracting a constant
26591 from SP. For epilogues the principle is the same except we use pop.
26592 FOR_PROLOGUE indicates which we're generating. */
26594 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
26596 HOST_WIDE_INT amount
;
26597 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
26598 /* Extract a mask of the ones we can give to the Thumb's push/pop
26600 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
26601 /* Then count how many other high registers will need to be pushed. */
26602 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26603 int n_free
, reg_base
, size
;
26605 if (!for_prologue
&& frame_pointer_needed
)
26606 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26608 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26610 /* If the stack frame size is 512 exactly, we can save one load
26611 instruction, which should make this a win even when optimizing
26613 if (!optimize_size
&& amount
!= 512)
26616 /* Can't do this if there are high registers to push. */
26617 if (high_regs_pushed
!= 0)
26620 /* Shouldn't do it in the prologue if no registers would normally
26621 be pushed at all. In the epilogue, also allow it if we'll have
26622 a pop insn for the PC. */
26625 || TARGET_BACKTRACE
26626 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
26627 || TARGET_INTERWORK
26628 || crtl
->args
.pretend_args_size
!= 0))
26631 /* Don't do this if thumb_expand_prologue wants to emit instructions
26632 between the push and the stack frame allocation. */
26634 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26635 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
26642 size
= arm_size_return_regs ();
26643 reg_base
= ARM_NUM_INTS (size
);
26644 live_regs_mask
>>= reg_base
;
26647 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
26648 && (for_prologue
|| call_used_or_fixed_reg_p (reg_base
+ n_free
)))
26650 live_regs_mask
>>= 1;
26656 gcc_assert (amount
/ 4 * 4 == amount
);
26658 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
26659 return (amount
- 508) / 4;
26660 if (amount
<= n_free
* 4)
26665 /* The bits which aren't usefully expanded as rtl. */
26667 thumb1_unexpanded_epilogue (void)
26669 arm_stack_offsets
*offsets
;
26671 unsigned long live_regs_mask
= 0;
26672 int high_regs_pushed
= 0;
26674 int had_to_push_lr
;
26677 if (cfun
->machine
->return_used_this_function
!= 0)
26680 if (IS_NAKED (arm_current_func_type ()))
26683 offsets
= arm_get_frame_offsets ();
26684 live_regs_mask
= offsets
->saved_regs_mask
;
26685 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26687 /* If we can deduce the registers used from the function's return value.
26688 This is more reliable that examining df_regs_ever_live_p () because that
26689 will be set if the register is ever used in the function, not just if
26690 the register is used to hold a return value. */
26691 size
= arm_size_return_regs ();
26693 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
26696 unsigned long extra_mask
= (1 << extra_pop
) - 1;
26697 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
26700 /* The prolog may have pushed some high registers to use as
26701 work registers. e.g. the testsuite file:
26702 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26703 compiles to produce:
26704 push {r4, r5, r6, r7, lr}
26708 as part of the prolog. We have to undo that pushing here. */
26710 if (high_regs_pushed
)
26712 unsigned long mask
= live_regs_mask
& 0xff;
26715 mask
|= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26718 /* Oh dear! We have no low registers into which we can pop
26721 ("no low registers available for popping high registers");
26723 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
26724 if (live_regs_mask
& (1 << next_hi_reg
))
26727 while (high_regs_pushed
)
26729 /* Find lo register(s) into which the high register(s) can
26731 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
26733 if (mask
& (1 << regno
))
26734 high_regs_pushed
--;
26735 if (high_regs_pushed
== 0)
26739 if (high_regs_pushed
== 0 && regno
>= 0)
26740 mask
&= ~((1 << regno
) - 1);
26742 /* Pop the values into the low register(s). */
26743 thumb_pop (asm_out_file
, mask
);
26745 /* Move the value(s) into the high registers. */
26746 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
26748 if (mask
& (1 << regno
))
26750 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
26753 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
26755 if (live_regs_mask
& (1 << next_hi_reg
))
26760 live_regs_mask
&= ~0x0f00;
26763 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
26764 live_regs_mask
&= 0xff;
26766 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
26768 /* Pop the return address into the PC. */
26769 if (had_to_push_lr
)
26770 live_regs_mask
|= 1 << PC_REGNUM
;
26772 /* Either no argument registers were pushed or a backtrace
26773 structure was created which includes an adjusted stack
26774 pointer, so just pop everything. */
26775 if (live_regs_mask
)
26776 thumb_pop (asm_out_file
, live_regs_mask
);
26778 /* We have either just popped the return address into the
26779 PC or it is was kept in LR for the entire function.
26780 Note that thumb_pop has already called thumb_exit if the
26781 PC was in the list. */
26782 if (!had_to_push_lr
)
26783 thumb_exit (asm_out_file
, LR_REGNUM
);
26787 /* Pop everything but the return address. */
26788 if (live_regs_mask
)
26789 thumb_pop (asm_out_file
, live_regs_mask
);
26791 if (had_to_push_lr
)
26795 /* We have no free low regs, so save one. */
26796 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
26800 /* Get the return address into a temporary register. */
26801 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
26805 /* Move the return address to lr. */
26806 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
26808 /* Restore the low register. */
26809 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
26814 regno
= LAST_ARG_REGNUM
;
26819 /* Remove the argument registers that were pushed onto the stack. */
26820 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
26821 SP_REGNUM
, SP_REGNUM
,
26822 crtl
->args
.pretend_args_size
);
26824 thumb_exit (asm_out_file
, regno
);
26830 /* Functions to save and restore machine-specific function data. */
26831 static struct machine_function
*
26832 arm_init_machine_status (void)
26834 struct machine_function
*machine
;
26835 machine
= ggc_cleared_alloc
<machine_function
> ();
26837 #if ARM_FT_UNKNOWN != 0
26838 machine
->func_type
= ARM_FT_UNKNOWN
;
26840 machine
->static_chain_stack_bytes
= -1;
26844 /* Return an RTX indicating where the return address to the
26845 calling function can be found. */
26847 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
26852 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
26855 /* Do anything needed before RTL is emitted for each function. */
26857 arm_init_expanders (void)
26859 /* Arrange to initialize and mark the machine per-function status. */
26860 init_machine_status
= arm_init_machine_status
;
26862 /* This is to stop the combine pass optimizing away the alignment
26863 adjustment of va_arg. */
26864 /* ??? It is claimed that this should not be necessary. */
26866 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
26869 /* Check that FUNC is called with a different mode. */
26872 arm_change_mode_p (tree func
)
26874 if (TREE_CODE (func
) != FUNCTION_DECL
)
26877 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
26880 callee_tree
= target_option_default_node
;
26882 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
26883 int flags
= callee_opts
->x_target_flags
;
26885 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
26888 /* Like arm_compute_initial_elimination offset. Simpler because there
26889 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26890 to point at the base of the local variables after static stack
26891 space for a function has been allocated. */
26894 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
26896 arm_stack_offsets
*offsets
;
26898 offsets
= arm_get_frame_offsets ();
26902 case ARG_POINTER_REGNUM
:
26905 case STACK_POINTER_REGNUM
:
26906 return offsets
->outgoing_args
- offsets
->saved_args
;
26908 case FRAME_POINTER_REGNUM
:
26909 return offsets
->soft_frame
- offsets
->saved_args
;
26911 case ARM_HARD_FRAME_POINTER_REGNUM
:
26912 return offsets
->saved_regs
- offsets
->saved_args
;
26914 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26915 return offsets
->locals_base
- offsets
->saved_args
;
26918 gcc_unreachable ();
26922 case FRAME_POINTER_REGNUM
:
26925 case STACK_POINTER_REGNUM
:
26926 return offsets
->outgoing_args
- offsets
->soft_frame
;
26928 case ARM_HARD_FRAME_POINTER_REGNUM
:
26929 return offsets
->saved_regs
- offsets
->soft_frame
;
26931 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26932 return offsets
->locals_base
- offsets
->soft_frame
;
26935 gcc_unreachable ();
26940 gcc_unreachable ();
26944 /* Generate the function's prologue. */
26947 thumb1_expand_prologue (void)
26951 HOST_WIDE_INT amount
;
26952 HOST_WIDE_INT size
;
26953 arm_stack_offsets
*offsets
;
26954 unsigned long func_type
;
26956 unsigned long live_regs_mask
;
26957 unsigned long l_mask
;
26958 unsigned high_regs_pushed
= 0;
26959 bool lr_needs_saving
;
26961 func_type
= arm_current_func_type ();
26963 /* Naked functions don't have prologues. */
26964 if (IS_NAKED (func_type
))
26966 if (flag_stack_usage_info
)
26967 current_function_static_stack_size
= 0;
26971 if (IS_INTERRUPT (func_type
))
26973 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
26977 if (is_called_in_ARM_mode (current_function_decl
))
26978 emit_insn (gen_prologue_thumb1_interwork ());
26980 offsets
= arm_get_frame_offsets ();
26981 live_regs_mask
= offsets
->saved_regs_mask
;
26982 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
26984 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26985 l_mask
= live_regs_mask
& 0x40ff;
26986 /* Then count how many other high registers will need to be pushed. */
26987 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26989 if (crtl
->args
.pretend_args_size
)
26991 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
26993 if (cfun
->machine
->uses_anonymous_args
)
26995 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
26996 unsigned long mask
;
26998 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
26999 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
27001 insn
= thumb1_emit_multi_reg_push (mask
, 0);
27005 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27006 stack_pointer_rtx
, x
));
27008 RTX_FRAME_RELATED_P (insn
) = 1;
27011 if (TARGET_BACKTRACE
)
27013 HOST_WIDE_INT offset
= 0;
27014 unsigned work_register
;
27015 rtx work_reg
, x
, arm_hfp_rtx
;
27017 /* We have been asked to create a stack backtrace structure.
27018 The code looks like this:
27022 0 sub SP, #16 Reserve space for 4 registers.
27023 2 push {R7} Push low registers.
27024 4 add R7, SP, #20 Get the stack pointer before the push.
27025 6 str R7, [SP, #8] Store the stack pointer
27026 (before reserving the space).
27027 8 mov R7, PC Get hold of the start of this code + 12.
27028 10 str R7, [SP, #16] Store it.
27029 12 mov R7, FP Get hold of the current frame pointer.
27030 14 str R7, [SP, #4] Store it.
27031 16 mov R7, LR Get hold of the current return address.
27032 18 str R7, [SP, #12] Store it.
27033 20 add R7, SP, #16 Point at the start of the
27034 backtrace structure.
27035 22 mov FP, R7 Put this value into the frame pointer. */
27037 work_register
= thumb_find_work_register (live_regs_mask
);
27038 work_reg
= gen_rtx_REG (SImode
, work_register
);
27039 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
27041 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27042 stack_pointer_rtx
, GEN_INT (-16)));
27043 RTX_FRAME_RELATED_P (insn
) = 1;
27047 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
27048 RTX_FRAME_RELATED_P (insn
) = 1;
27049 lr_needs_saving
= false;
27051 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
27054 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
27055 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27057 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
27058 x
= gen_frame_mem (SImode
, x
);
27059 emit_move_insn (x
, work_reg
);
27061 /* Make sure that the instruction fetching the PC is in the right place
27062 to calculate "start of backtrace creation code + 12". */
27063 /* ??? The stores using the common WORK_REG ought to be enough to
27064 prevent the scheduler from doing anything weird. Failing that
27065 we could always move all of the following into an UNSPEC_VOLATILE. */
27068 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27069 emit_move_insn (work_reg
, x
);
27071 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27072 x
= gen_frame_mem (SImode
, x
);
27073 emit_move_insn (x
, work_reg
);
27075 emit_move_insn (work_reg
, arm_hfp_rtx
);
27077 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27078 x
= gen_frame_mem (SImode
, x
);
27079 emit_move_insn (x
, work_reg
);
27083 emit_move_insn (work_reg
, arm_hfp_rtx
);
27085 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27086 x
= gen_frame_mem (SImode
, x
);
27087 emit_move_insn (x
, work_reg
);
27089 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27090 emit_move_insn (work_reg
, x
);
27092 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27093 x
= gen_frame_mem (SImode
, x
);
27094 emit_move_insn (x
, work_reg
);
27097 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
27098 emit_move_insn (work_reg
, x
);
27100 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
27101 x
= gen_frame_mem (SImode
, x
);
27102 emit_move_insn (x
, work_reg
);
27104 x
= GEN_INT (offset
+ 12);
27105 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27107 emit_move_insn (arm_hfp_rtx
, work_reg
);
27109 /* Optimization: If we are not pushing any low registers but we are going
27110 to push some high registers then delay our first push. This will just
27111 be a push of LR and we can combine it with the push of the first high
27113 else if ((l_mask
& 0xff) != 0
27114 || (high_regs_pushed
== 0 && lr_needs_saving
))
27116 unsigned long mask
= l_mask
;
27117 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
27118 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
27119 RTX_FRAME_RELATED_P (insn
) = 1;
27120 lr_needs_saving
= false;
27123 if (high_regs_pushed
)
27125 unsigned pushable_regs
;
27126 unsigned next_hi_reg
;
27127 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
27128 : crtl
->args
.info
.nregs
;
27129 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
27131 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
27132 if (live_regs_mask
& (1 << next_hi_reg
))
27135 /* Here we need to mask out registers used for passing arguments
27136 even if they can be pushed. This is to avoid using them to
27137 stash the high registers. Such kind of stash may clobber the
27138 use of arguments. */
27139 pushable_regs
= l_mask
& (~arg_regs_mask
);
27140 pushable_regs
|= thumb1_prologue_unused_call_clobbered_lo_regs ();
27142 /* Normally, LR can be used as a scratch register once it has been
27143 saved; but if the function examines its own return address then
27144 the value is still live and we need to avoid using it. */
27145 bool return_addr_live
27146 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
27149 if (lr_needs_saving
|| return_addr_live
)
27150 pushable_regs
&= ~(1 << LR_REGNUM
);
27152 if (pushable_regs
== 0)
27153 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
27155 while (high_regs_pushed
> 0)
27157 unsigned long real_regs_mask
= 0;
27158 unsigned long push_mask
= 0;
27160 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
27162 if (pushable_regs
& (1 << regno
))
27164 emit_move_insn (gen_rtx_REG (SImode
, regno
),
27165 gen_rtx_REG (SImode
, next_hi_reg
));
27167 high_regs_pushed
--;
27168 real_regs_mask
|= (1 << next_hi_reg
);
27169 push_mask
|= (1 << regno
);
27171 if (high_regs_pushed
)
27173 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
27175 if (live_regs_mask
& (1 << next_hi_reg
))
27183 /* If we had to find a work register and we have not yet
27184 saved the LR then add it to the list of regs to push. */
27185 if (lr_needs_saving
)
27187 push_mask
|= 1 << LR_REGNUM
;
27188 real_regs_mask
|= 1 << LR_REGNUM
;
27189 lr_needs_saving
= false;
27190 /* If the return address is not live at this point, we
27191 can add LR to the list of registers that we can use
27193 if (!return_addr_live
)
27194 pushable_regs
|= 1 << LR_REGNUM
;
27197 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
27198 RTX_FRAME_RELATED_P (insn
) = 1;
27202 /* Load the pic register before setting the frame pointer,
27203 so we can use r7 as a temporary work register. */
27204 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
27205 arm_load_pic_register (live_regs_mask
, NULL_RTX
);
27207 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
27208 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
27209 stack_pointer_rtx
);
27211 size
= offsets
->outgoing_args
- offsets
->saved_args
;
27212 if (flag_stack_usage_info
)
27213 current_function_static_stack_size
= size
;
27215 /* If we have a frame, then do stack checking. FIXME: not implemented. */
27216 if ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27217 || flag_stack_clash_protection
)
27219 sorry ("%<-fstack-check=specific%> for Thumb-1");
27221 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27222 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
27227 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27228 GEN_INT (- amount
)));
27229 RTX_FRAME_RELATED_P (insn
) = 1;
27235 /* The stack decrement is too big for an immediate value in a single
27236 insn. In theory we could issue multiple subtracts, but after
27237 three of them it becomes more space efficient to place the full
27238 value in the constant pool and load into a register. (Also the
27239 ARM debugger really likes to see only one stack decrement per
27240 function). So instead we look for a scratch register into which
27241 we can load the decrement, and then we subtract this from the
27242 stack pointer. Unfortunately on the thumb the only available
27243 scratch registers are the argument registers, and we cannot use
27244 these as they may hold arguments to the function. Instead we
27245 attempt to locate a call preserved register which is used by this
27246 function. If we can find one, then we know that it will have
27247 been pushed at the start of the prologue and so we can corrupt
27249 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
27250 if (live_regs_mask
& (1 << regno
))
27253 gcc_assert(regno
<= LAST_LO_REGNUM
);
27255 reg
= gen_rtx_REG (SImode
, regno
);
27257 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
27259 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27260 stack_pointer_rtx
, reg
));
27262 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
27263 plus_constant (Pmode
, stack_pointer_rtx
,
27265 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
27266 RTX_FRAME_RELATED_P (insn
) = 1;
27270 if (frame_pointer_needed
)
27271 thumb_set_frame_pointer (offsets
);
27273 /* If we are profiling, make sure no instructions are scheduled before
27274 the call to mcount. Similarly if the user has requested no
27275 scheduling in the prolog. Similarly if we want non-call exceptions
27276 using the EABI unwinder, to prevent faulting instructions from being
27277 swapped with a stack adjustment. */
27278 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
27279 || (arm_except_unwind_info (&global_options
) == UI_TARGET
27280 && cfun
->can_throw_non_call_exceptions
))
27281 emit_insn (gen_blockage ());
27283 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
27284 if (live_regs_mask
& 0xff)
27285 cfun
->machine
->lr_save_eliminated
= 0;
27288 /* Clear caller saved registers not used to pass return values and leaked
27289 condition flags before exiting a cmse_nonsecure_entry function. */
27292 cmse_nonsecure_entry_clear_before_return (void)
27294 bool clear_vfpregs
= TARGET_HARD_FLOAT
|| TARGET_HAVE_FPCXT_CMSE
;
27295 int regno
, maxregno
= clear_vfpregs
? LAST_VFP_REGNUM
: IP_REGNUM
;
27296 uint32_t padding_bits_to_clear
= 0;
27297 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
27298 rtx r1_reg
, result_rtl
, clearing_reg
= NULL_RTX
;
27301 bitmap_clear (to_clear_bitmap
);
27302 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
27303 bitmap_set_bit (to_clear_bitmap
, IP_REGNUM
);
27305 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27309 int float_bits
= D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1;
27311 bitmap_set_range (to_clear_bitmap
, FIRST_VFP_REGNUM
, float_bits
);
27313 if (!TARGET_HAVE_FPCXT_CMSE
)
27315 /* Make sure we don't clear the two scratch registers used to clear
27316 the relevant FPSCR bits in output_return_instruction. */
27317 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
27318 bitmap_clear_bit (to_clear_bitmap
, IP_REGNUM
);
27319 emit_use (gen_rtx_REG (SImode
, 4));
27320 bitmap_clear_bit (to_clear_bitmap
, 4);
27324 /* If the user has defined registers to be caller saved, these are no longer
27325 restored by the function before returning and must thus be cleared for
27326 security purposes. */
27327 for (regno
= NUM_ARG_REGS
; regno
<= maxregno
; regno
++)
27329 /* We do not touch registers that can be used to pass arguments as per
27330 the AAPCS, since these should never be made callee-saved by user
27332 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
27334 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
27336 if (!callee_saved_reg_p (regno
)
27337 && (!IN_RANGE (regno
, FIRST_VFP_REGNUM
, LAST_VFP_REGNUM
)
27338 || TARGET_HARD_FLOAT
))
27339 bitmap_set_bit (to_clear_bitmap
, regno
);
27342 /* Make sure we do not clear the registers used to return the result in. */
27343 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
27344 if (!VOID_TYPE_P (result_type
))
27346 uint64_t to_clear_return_mask
;
27347 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
27349 /* No need to check that we return in registers, because we don't
27350 support returning on stack yet. */
27351 gcc_assert (REG_P (result_rtl
));
27352 to_clear_return_mask
27353 = compute_not_to_clear_mask (result_type
, result_rtl
, 0,
27354 &padding_bits_to_clear
);
27355 if (to_clear_return_mask
)
27357 gcc_assert ((unsigned) maxregno
< sizeof (long long) * __CHAR_BIT__
);
27358 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
27360 if (to_clear_return_mask
& (1ULL << regno
))
27361 bitmap_clear_bit (to_clear_bitmap
, regno
);
27366 if (padding_bits_to_clear
!= 0)
27368 int to_clear_bitmap_size
= SBITMAP_SIZE ((sbitmap
) to_clear_bitmap
);
27369 auto_sbitmap
to_clear_arg_regs_bitmap (to_clear_bitmap_size
);
27371 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27372 returning a composite type, which only uses r0. Let's make sure that
27373 r1-r3 is cleared too. */
27374 bitmap_clear (to_clear_arg_regs_bitmap
);
27375 bitmap_set_range (to_clear_arg_regs_bitmap
, R1_REGNUM
, NUM_ARG_REGS
- 1);
27376 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap
, to_clear_bitmap
));
27379 /* Clear full registers that leak before returning. */
27380 clearing_reg
= gen_rtx_REG (SImode
, TARGET_THUMB1
? R0_REGNUM
: LR_REGNUM
);
27381 r1_reg
= gen_rtx_REG (SImode
, R0_REGNUM
+ 1);
27382 cmse_clear_registers (to_clear_bitmap
, &padding_bits_to_clear
, 1, r1_reg
,
27386 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27387 POP instruction can be generated. LR should be replaced by PC. All
27388 the checks required are already done by USE_RETURN_INSN (). Hence,
27389 all we really need to check here is if single register is to be
27390 returned, or multiple register return. */
27392 thumb2_expand_return (bool simple_return
)
27395 unsigned long saved_regs_mask
;
27396 arm_stack_offsets
*offsets
;
27398 offsets
= arm_get_frame_offsets ();
27399 saved_regs_mask
= offsets
->saved_regs_mask
;
27401 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27402 if (saved_regs_mask
& (1 << i
))
27405 if (!simple_return
&& saved_regs_mask
)
27407 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27408 functions or adapt code to handle according to ACLE. This path should
27409 not be reachable for cmse_nonsecure_entry functions though we prefer
27410 to assert it for now to ensure that future code changes do not silently
27411 change this behavior. */
27412 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27415 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27416 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
27417 rtx addr
= gen_rtx_MEM (SImode
,
27418 gen_rtx_POST_INC (SImode
,
27419 stack_pointer_rtx
));
27420 set_mem_alias_set (addr
, get_frame_alias_set ());
27421 XVECEXP (par
, 0, 0) = ret_rtx
;
27422 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
27423 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
27424 emit_jump_insn (par
);
27428 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
27429 saved_regs_mask
|= (1 << PC_REGNUM
);
27430 arm_emit_multi_reg_pop (saved_regs_mask
);
27435 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27436 cmse_nonsecure_entry_clear_before_return ();
27437 emit_jump_insn (simple_return_rtx
);
27442 thumb1_expand_epilogue (void)
27444 HOST_WIDE_INT amount
;
27445 arm_stack_offsets
*offsets
;
27448 /* Naked functions don't have prologues. */
27449 if (IS_NAKED (arm_current_func_type ()))
27452 offsets
= arm_get_frame_offsets ();
27453 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27455 if (frame_pointer_needed
)
27457 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
27458 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27460 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
27462 gcc_assert (amount
>= 0);
27465 emit_insn (gen_blockage ());
27468 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27469 GEN_INT (amount
)));
27472 /* r3 is always free in the epilogue. */
27473 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
27475 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
27476 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
27480 /* Emit a USE (stack_pointer_rtx), so that
27481 the stack adjustment will not be deleted. */
27482 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27484 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
27485 emit_insn (gen_blockage ());
27487 /* Emit a clobber for each insn that will be restored in the epilogue,
27488 so that flow2 will get register lifetimes correct. */
27489 for (regno
= 0; regno
< 13; regno
++)
27490 if (reg_needs_saving_p (regno
))
27491 emit_clobber (gen_rtx_REG (SImode
, regno
));
27493 if (! df_regs_ever_live_p (LR_REGNUM
))
27494 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
27496 /* Clear all caller-saved regs that are not used to return. */
27497 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27498 cmse_nonsecure_entry_clear_before_return ();
27501 /* Epilogue code for APCS frame. */
27503 arm_expand_epilogue_apcs_frame (bool really_return
)
27505 unsigned long func_type
;
27506 unsigned long saved_regs_mask
;
27509 int floats_from_frame
= 0;
27510 arm_stack_offsets
*offsets
;
27512 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
27513 func_type
= arm_current_func_type ();
27515 /* Get frame offsets for ARM. */
27516 offsets
= arm_get_frame_offsets ();
27517 saved_regs_mask
= offsets
->saved_regs_mask
;
27519 /* Find the offset of the floating-point save area in the frame. */
27521 = (offsets
->saved_args
27522 + arm_compute_static_chain_stack_bytes ()
27525 /* Compute how many core registers saved and how far away the floats are. */
27526 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27527 if (saved_regs_mask
& (1 << i
))
27530 floats_from_frame
+= 4;
27533 if (TARGET_VFP_BASE
)
27536 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
27538 /* The offset is from IP_REGNUM. */
27539 int saved_size
= arm_get_vfp_saved_size ();
27540 if (saved_size
> 0)
27543 floats_from_frame
+= saved_size
;
27544 insn
= emit_insn (gen_addsi3 (ip_rtx
,
27545 hard_frame_pointer_rtx
,
27546 GEN_INT (-floats_from_frame
)));
27547 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
27548 ip_rtx
, hard_frame_pointer_rtx
);
27551 /* Generate VFP register multi-pop. */
27552 start_reg
= FIRST_VFP_REGNUM
;
27554 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
27555 /* Look for a case where a reg does not need restoring. */
27556 if (!reg_needs_saving_p (i
) && !reg_needs_saving_p (i
+ 1))
27558 if (start_reg
!= i
)
27559 arm_emit_vfp_multi_reg_pop (start_reg
,
27560 (i
- start_reg
) / 2,
27561 gen_rtx_REG (SImode
,
27566 /* Restore the remaining regs that we have discovered (or possibly
27567 even all of them, if the conditional in the for loop never
27569 if (start_reg
!= i
)
27570 arm_emit_vfp_multi_reg_pop (start_reg
,
27571 (i
- start_reg
) / 2,
27572 gen_rtx_REG (SImode
, IP_REGNUM
));
27577 /* The frame pointer is guaranteed to be non-double-word aligned, as
27578 it is set to double-word-aligned old_stack_pointer - 4. */
27580 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
27582 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
27583 if (reg_needs_saving_p (i
))
27585 rtx addr
= gen_frame_mem (V2SImode
,
27586 plus_constant (Pmode
, hard_frame_pointer_rtx
,
27588 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27589 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27590 gen_rtx_REG (V2SImode
, i
),
27596 /* saved_regs_mask should contain IP which contains old stack pointer
27597 at the time of activation creation. Since SP and IP are adjacent registers,
27598 we can restore the value directly into SP. */
27599 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
27600 saved_regs_mask
&= ~(1 << IP_REGNUM
);
27601 saved_regs_mask
|= (1 << SP_REGNUM
);
27603 /* There are two registers left in saved_regs_mask - LR and PC. We
27604 only need to restore LR (the return address), but to
27605 save time we can load it directly into PC, unless we need a
27606 special function exit sequence, or we are not really returning. */
27608 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
27609 && !crtl
->calls_eh_return
)
27610 /* Delete LR from the register mask, so that LR on
27611 the stack is loaded into the PC in the register mask. */
27612 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27614 saved_regs_mask
&= ~(1 << PC_REGNUM
);
27616 num_regs
= bit_count (saved_regs_mask
);
27617 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
27620 emit_insn (gen_blockage ());
27621 /* Unwind the stack to just below the saved registers. */
27622 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27623 hard_frame_pointer_rtx
,
27624 GEN_INT (- 4 * num_regs
)));
27626 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
27627 stack_pointer_rtx
, hard_frame_pointer_rtx
);
27630 arm_emit_multi_reg_pop (saved_regs_mask
);
27632 if (IS_INTERRUPT (func_type
))
27634 /* Interrupt handlers will have pushed the
27635 IP onto the stack, so restore it now. */
27637 rtx addr
= gen_rtx_MEM (SImode
,
27638 gen_rtx_POST_INC (SImode
,
27639 stack_pointer_rtx
));
27640 set_mem_alias_set (addr
, get_frame_alias_set ());
27641 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
27642 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27643 gen_rtx_REG (SImode
, IP_REGNUM
),
27647 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
27650 if (crtl
->calls_eh_return
)
27651 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27653 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27655 if (IS_STACKALIGN (func_type
))
27656 /* Restore the original stack pointer. Before prologue, the stack was
27657 realigned and the original stack pointer saved in r0. For details,
27658 see comment in arm_expand_prologue. */
27659 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
27661 emit_jump_insn (simple_return_rtx
);
27664 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27665 function is not a sibcall. */
27667 arm_expand_epilogue (bool really_return
)
27669 unsigned long func_type
;
27670 unsigned long saved_regs_mask
;
27674 arm_stack_offsets
*offsets
;
27676 func_type
= arm_current_func_type ();
27678 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27679 let output_return_instruction take care of instruction emission if any. */
27680 if (IS_NAKED (func_type
)
27681 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
27684 emit_jump_insn (simple_return_rtx
);
27688 /* If we are throwing an exception, then we really must be doing a
27689 return, so we can't tail-call. */
27690 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
27692 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
27694 arm_expand_epilogue_apcs_frame (really_return
);
27698 /* Get frame offsets for ARM. */
27699 offsets
= arm_get_frame_offsets ();
27700 saved_regs_mask
= offsets
->saved_regs_mask
;
27701 num_regs
= bit_count (saved_regs_mask
);
27703 if (frame_pointer_needed
)
27706 /* Restore stack pointer if necessary. */
27709 /* In ARM mode, frame pointer points to first saved register.
27710 Restore stack pointer to last saved register. */
27711 amount
= offsets
->frame
- offsets
->saved_regs
;
27713 /* Force out any pending memory operations that reference stacked data
27714 before stack de-allocation occurs. */
27715 emit_insn (gen_blockage ());
27716 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27717 hard_frame_pointer_rtx
,
27718 GEN_INT (amount
)));
27719 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27721 hard_frame_pointer_rtx
);
27723 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27725 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27729 /* In Thumb-2 mode, the frame pointer points to the last saved
27731 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27734 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
27735 hard_frame_pointer_rtx
,
27736 GEN_INT (amount
)));
27737 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27738 hard_frame_pointer_rtx
,
27739 hard_frame_pointer_rtx
);
27742 /* Force out any pending memory operations that reference stacked data
27743 before stack de-allocation occurs. */
27744 emit_insn (gen_blockage ());
27745 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
27746 hard_frame_pointer_rtx
));
27747 arm_add_cfa_adjust_cfa_note (insn
, 0,
27749 hard_frame_pointer_rtx
);
27750 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27752 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27757 /* Pop off outgoing args and local frame to adjust stack pointer to
27758 last saved register. */
27759 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27763 /* Force out any pending memory operations that reference stacked data
27764 before stack de-allocation occurs. */
27765 emit_insn (gen_blockage ());
27766 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27768 GEN_INT (amount
)));
27769 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27770 stack_pointer_rtx
, stack_pointer_rtx
);
27771 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27773 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27777 if (TARGET_VFP_BASE
)
27779 /* Generate VFP register multi-pop. */
27780 int end_reg
= LAST_VFP_REGNUM
+ 1;
27782 /* Scan the registers in reverse order. We need to match
27783 any groupings made in the prologue and generate matching
27784 vldm operations. The need to match groups is because,
27785 unlike pop, vldm can only do consecutive regs. */
27786 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
27787 /* Look for a case where a reg does not need restoring. */
27788 if (!reg_needs_saving_p (i
) && !reg_needs_saving_p (i
+ 1))
27790 /* Restore the regs discovered so far (from reg+2 to
27792 if (end_reg
> i
+ 2)
27793 arm_emit_vfp_multi_reg_pop (i
+ 2,
27794 (end_reg
- (i
+ 2)) / 2,
27795 stack_pointer_rtx
);
27799 /* Restore the remaining regs that we have discovered (or possibly
27800 even all of them, if the conditional in the for loop never
27802 if (end_reg
> i
+ 2)
27803 arm_emit_vfp_multi_reg_pop (i
+ 2,
27804 (end_reg
- (i
+ 2)) / 2,
27805 stack_pointer_rtx
);
27809 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
27810 if (reg_needs_saving_p (i
))
27813 rtx addr
= gen_rtx_MEM (V2SImode
,
27814 gen_rtx_POST_INC (SImode
,
27815 stack_pointer_rtx
));
27816 set_mem_alias_set (addr
, get_frame_alias_set ());
27817 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27818 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27819 gen_rtx_REG (V2SImode
, i
),
27821 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27822 stack_pointer_rtx
, stack_pointer_rtx
);
27825 if (saved_regs_mask
)
27828 bool return_in_pc
= false;
27830 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
27831 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
27832 && !IS_CMSE_ENTRY (func_type
)
27833 && !IS_STACKALIGN (func_type
)
27835 && crtl
->args
.pretend_args_size
== 0
27836 && saved_regs_mask
& (1 << LR_REGNUM
)
27837 && !crtl
->calls_eh_return
)
27839 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27840 saved_regs_mask
|= (1 << PC_REGNUM
);
27841 return_in_pc
= true;
27844 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
27846 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27847 if (saved_regs_mask
& (1 << i
))
27849 rtx addr
= gen_rtx_MEM (SImode
,
27850 gen_rtx_POST_INC (SImode
,
27851 stack_pointer_rtx
));
27852 set_mem_alias_set (addr
, get_frame_alias_set ());
27854 if (i
== PC_REGNUM
)
27856 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27857 XVECEXP (insn
, 0, 0) = ret_rtx
;
27858 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
27860 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
27861 insn
= emit_jump_insn (insn
);
27865 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
27867 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27868 gen_rtx_REG (SImode
, i
),
27870 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27872 stack_pointer_rtx
);
27879 && current_tune
->prefer_ldrd_strd
27880 && !optimize_function_for_size_p (cfun
))
27883 thumb2_emit_ldrd_pop (saved_regs_mask
);
27884 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
27885 arm_emit_ldrd_pop (saved_regs_mask
);
27887 arm_emit_multi_reg_pop (saved_regs_mask
);
27890 arm_emit_multi_reg_pop (saved_regs_mask
);
27898 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
27902 rtx dwarf
= NULL_RTX
;
27904 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27906 GEN_INT (amount
)));
27908 RTX_FRAME_RELATED_P (tmp
) = 1;
27910 if (cfun
->machine
->uses_anonymous_args
)
27912 /* Restore pretend args. Refer arm_expand_prologue on how to save
27913 pretend_args in stack. */
27914 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
27915 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
27916 for (j
= 0, i
= 0; j
< num_regs
; i
++)
27917 if (saved_regs_mask
& (1 << i
))
27919 rtx reg
= gen_rtx_REG (SImode
, i
);
27920 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
27923 REG_NOTES (tmp
) = dwarf
;
27925 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27926 stack_pointer_rtx
, stack_pointer_rtx
);
27929 if (IS_CMSE_ENTRY (func_type
))
27931 /* CMSE_ENTRY always returns. */
27932 gcc_assert (really_return
);
27933 /* Clear all caller-saved regs that are not used to return. */
27934 cmse_nonsecure_entry_clear_before_return ();
27936 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
27938 if (TARGET_HAVE_FPCXT_CMSE
)
27942 insn
= emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx
,
27943 GEN_INT (FPCXTNS_ENUM
)));
27944 rtx dwarf
= gen_rtx_SET (stack_pointer_rtx
,
27945 plus_constant (Pmode
, stack_pointer_rtx
, 4));
27946 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
27947 RTX_FRAME_RELATED_P (insn
) = 1;
27951 if (!really_return
)
27954 if (crtl
->calls_eh_return
)
27955 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27957 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27959 if (IS_STACKALIGN (func_type
))
27960 /* Restore the original stack pointer. Before prologue, the stack was
27961 realigned and the original stack pointer saved in r0. For details,
27962 see comment in arm_expand_prologue. */
27963 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
27965 emit_jump_insn (simple_return_rtx
);
27968 /* Implementation of insn prologue_thumb1_interwork. This is the first
27969 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27972 thumb1_output_interwork (void)
27975 FILE *f
= asm_out_file
;
27977 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
27978 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
27980 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
27982 /* Generate code sequence to switch us into Thumb mode. */
27983 /* The .code 32 directive has already been emitted by
27984 ASM_DECLARE_FUNCTION_NAME. */
27985 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
27986 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
27988 /* Generate a label, so that the debugger will notice the
27989 change in instruction sets. This label is also used by
27990 the assembler to bypass the ARM code when this function
27991 is called from a Thumb encoded function elsewhere in the
27992 same file. Hence the definition of STUB_NAME here must
27993 agree with the definition in gas/config/tc-arm.c. */
27995 #define STUB_NAME ".real_start_of"
27997 fprintf (f
, "\t.code\t16\n");
27999 if (arm_dllexport_name_p (name
))
28000 name
= arm_strip_name_encoding (name
);
28002 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
28003 fprintf (f
, "\t.thumb_func\n");
28004 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
28009 /* Handle the case of a double word load into a low register from
28010 a computed memory address. The computed address may involve a
28011 register which is overwritten by the load. */
28013 thumb_load_double_from_address (rtx
*operands
)
28021 gcc_assert (REG_P (operands
[0]));
28022 gcc_assert (MEM_P (operands
[1]));
28024 /* Get the memory address. */
28025 addr
= XEXP (operands
[1], 0);
28027 /* Work out how the memory address is computed. */
28028 switch (GET_CODE (addr
))
28031 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28033 if (REGNO (operands
[0]) == REGNO (addr
))
28035 output_asm_insn ("ldr\t%H0, %2", operands
);
28036 output_asm_insn ("ldr\t%0, %1", operands
);
28040 output_asm_insn ("ldr\t%0, %1", operands
);
28041 output_asm_insn ("ldr\t%H0, %2", operands
);
28046 /* Compute <address> + 4 for the high order load. */
28047 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28049 output_asm_insn ("ldr\t%0, %1", operands
);
28050 output_asm_insn ("ldr\t%H0, %2", operands
);
28054 arg1
= XEXP (addr
, 0);
28055 arg2
= XEXP (addr
, 1);
28057 if (CONSTANT_P (arg1
))
28058 base
= arg2
, offset
= arg1
;
28060 base
= arg1
, offset
= arg2
;
28062 gcc_assert (REG_P (base
));
28064 /* Catch the case of <address> = <reg> + <reg> */
28065 if (REG_P (offset
))
28067 int reg_offset
= REGNO (offset
);
28068 int reg_base
= REGNO (base
);
28069 int reg_dest
= REGNO (operands
[0]);
28071 /* Add the base and offset registers together into the
28072 higher destination register. */
28073 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
28074 reg_dest
+ 1, reg_base
, reg_offset
);
28076 /* Load the lower destination register from the address in
28077 the higher destination register. */
28078 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
28079 reg_dest
, reg_dest
+ 1);
28081 /* Load the higher destination register from its own address
28083 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
28084 reg_dest
+ 1, reg_dest
+ 1);
28088 /* Compute <address> + 4 for the high order load. */
28089 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28091 /* If the computed address is held in the low order register
28092 then load the high order register first, otherwise always
28093 load the low order register first. */
28094 if (REGNO (operands
[0]) == REGNO (base
))
28096 output_asm_insn ("ldr\t%H0, %2", operands
);
28097 output_asm_insn ("ldr\t%0, %1", operands
);
28101 output_asm_insn ("ldr\t%0, %1", operands
);
28102 output_asm_insn ("ldr\t%H0, %2", operands
);
28108 /* With no registers to worry about we can just load the value
28110 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28112 output_asm_insn ("ldr\t%H0, %2", operands
);
28113 output_asm_insn ("ldr\t%0, %1", operands
);
28117 gcc_unreachable ();
28124 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
28129 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28130 std::swap (operands
[4], operands
[5]);
28132 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
28133 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
28137 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28138 std::swap (operands
[4], operands
[5]);
28139 if (REGNO (operands
[5]) > REGNO (operands
[6]))
28140 std::swap (operands
[5], operands
[6]);
28141 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28142 std::swap (operands
[4], operands
[5]);
28144 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
28145 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
28149 gcc_unreachable ();
28155 /* Output a call-via instruction for thumb state. */
28157 thumb_call_via_reg (rtx reg
)
28159 int regno
= REGNO (reg
);
28162 gcc_assert (regno
< LR_REGNUM
);
28164 /* If we are in the normal text section we can use a single instance
28165 per compilation unit. If we are doing function sections, then we need
28166 an entry per section, since we can't rely on reachability. */
28167 if (in_section
== text_section
)
28169 thumb_call_reg_needed
= 1;
28171 if (thumb_call_via_label
[regno
] == NULL
)
28172 thumb_call_via_label
[regno
] = gen_label_rtx ();
28173 labelp
= thumb_call_via_label
+ regno
;
28177 if (cfun
->machine
->call_via
[regno
] == NULL
)
28178 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
28179 labelp
= cfun
->machine
->call_via
+ regno
;
28182 output_asm_insn ("bl\t%a0", labelp
);
28186 /* Routines for generating rtl. */
28188 thumb_expand_cpymemqi (rtx
*operands
)
28190 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
28191 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
28192 HOST_WIDE_INT len
= INTVAL (operands
[2]);
28193 HOST_WIDE_INT offset
= 0;
28197 emit_insn (gen_cpymem12b (out
, in
, out
, in
));
28203 emit_insn (gen_cpymem8b (out
, in
, out
, in
));
28209 rtx reg
= gen_reg_rtx (SImode
);
28210 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
28211 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
28218 rtx reg
= gen_reg_rtx (HImode
);
28219 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
28220 plus_constant (Pmode
, in
,
28222 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
28231 rtx reg
= gen_reg_rtx (QImode
);
28232 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
28233 plus_constant (Pmode
, in
,
28235 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
28242 thumb_reload_out_hi (rtx
*operands
)
28244 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
28247 /* Return the length of a function name prefix
28248 that starts with the character 'c'. */
28250 arm_get_strip_length (int c
)
28254 ARM_NAME_ENCODING_LENGTHS
28259 /* Return a pointer to a function's name with any
28260 and all prefix encodings stripped from it. */
28262 arm_strip_name_encoding (const char *name
)
28266 while ((skip
= arm_get_strip_length (* name
)))
28272 /* If there is a '*' anywhere in the name's prefix, then
28273 emit the stripped name verbatim, otherwise prepend an
28274 underscore if leading underscores are being used. */
28276 arm_asm_output_labelref (FILE *stream
, const char *name
)
28281 while ((skip
= arm_get_strip_length (* name
)))
28283 verbatim
|= (*name
== '*');
28288 fputs (name
, stream
);
28290 asm_fprintf (stream
, "%U%s", name
);
28293 /* This function is used to emit an EABI tag and its associated value.
28294 We emit the numerical value of the tag in case the assembler does not
28295 support textual tags. (Eg gas prior to 2.20). If requested we include
28296 the tag name in a comment so that anyone reading the assembler output
28297 will know which tag is being set.
28299 This function is not static because arm-c.cc needs it too. */
28302 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
28304 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
28305 if (flag_verbose_asm
|| flag_debug_asm
)
28306 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
28307 asm_fprintf (asm_out_file
, "\n");
28310 /* This function is used to print CPU tuning information as comment
28311 in assembler file. Pointers are not printed for now. */
28314 arm_print_tune_info (void)
28316 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
28317 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
28318 current_tune
->constant_limit
);
28319 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28320 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
28321 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28322 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
28323 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28324 "prefetch.l1_cache_size:\t%d\n",
28325 current_tune
->prefetch
.l1_cache_size
);
28326 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28327 "prefetch.l1_cache_line_size:\t%d\n",
28328 current_tune
->prefetch
.l1_cache_line_size
);
28329 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28330 "prefer_constant_pool:\t%d\n",
28331 (int) current_tune
->prefer_constant_pool
);
28332 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28333 "branch_cost:\t(s:speed, p:predictable)\n");
28334 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
28335 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
28336 current_tune
->branch_cost (false, false));
28337 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
28338 current_tune
->branch_cost (false, true));
28339 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
28340 current_tune
->branch_cost (true, false));
28341 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
28342 current_tune
->branch_cost (true, true));
28343 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28344 "prefer_ldrd_strd:\t%d\n",
28345 (int) current_tune
->prefer_ldrd_strd
);
28346 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28347 "logical_op_non_short_circuit:\t[%d,%d]\n",
28348 (int) current_tune
->logical_op_non_short_circuit_thumb
,
28349 (int) current_tune
->logical_op_non_short_circuit_arm
);
28350 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28351 "disparage_flag_setting_t16_encodings:\t%d\n",
28352 (int) current_tune
->disparage_flag_setting_t16_encodings
);
28353 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28354 "string_ops_prefer_neon:\t%d\n",
28355 (int) current_tune
->string_ops_prefer_neon
);
28356 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28357 "max_insns_inline_memset:\t%d\n",
28358 current_tune
->max_insns_inline_memset
);
28359 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
28360 current_tune
->fusible_ops
);
28361 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
28362 (int) current_tune
->sched_autopref
);
28365 /* The last set of target options used to emit .arch directives, etc. This
28366 could be a function-local static if it were not required to expose it as a
28367 root to the garbage collector. */
28368 static GTY(()) cl_target_option
*last_asm_targ_options
= NULL
;
28370 /* Print .arch and .arch_extension directives corresponding to the
28371 current architecture configuration. */
28373 arm_print_asm_arch_directives (FILE *stream
, cl_target_option
*targ_options
)
28375 arm_build_target build_target
;
28376 /* If the target options haven't changed since the last time we were called
28377 there is nothing to do. This should be sufficient to suppress the
28378 majority of redundant work. */
28379 if (last_asm_targ_options
== targ_options
)
28382 last_asm_targ_options
= targ_options
;
28384 build_target
.isa
= sbitmap_alloc (isa_num_bits
);
28385 arm_configure_build_target (&build_target
, targ_options
, false);
28387 if (build_target
.core_name
28388 && !bitmap_bit_p (build_target
.isa
, isa_bit_quirk_no_asmcpu
))
28390 const char* truncated_name
28391 = arm_rewrite_selected_cpu (build_target
.core_name
);
28392 asm_fprintf (stream
, "\t.cpu %s\n", truncated_name
);
28395 const arch_option
*arch
28396 = arm_parse_arch_option_name (all_architectures
, "-march",
28397 build_target
.arch_name
);
28398 auto_sbitmap
opt_bits (isa_num_bits
);
28402 if (strcmp (build_target
.arch_name
, "armv7ve") == 0)
28404 /* Keep backward compatability for assemblers which don't support
28405 armv7ve. Fortunately, none of the following extensions are reset
28406 by a .fpu directive. */
28407 asm_fprintf (stream
, "\t.arch armv7-a\n");
28408 asm_fprintf (stream
, "\t.arch_extension virt\n");
28409 asm_fprintf (stream
, "\t.arch_extension idiv\n");
28410 asm_fprintf (stream
, "\t.arch_extension sec\n");
28411 asm_fprintf (stream
, "\t.arch_extension mp\n");
28414 asm_fprintf (stream
, "\t.arch %s\n", build_target
.arch_name
);
28416 /* The .fpu directive will reset any architecture extensions from the
28417 assembler that relate to the fp/vector extensions. So put this out before
28418 any .arch_extension directives. */
28419 const char *fpu_name
= (TARGET_SOFT_FLOAT
28421 : arm_identify_fpu_from_isa (build_target
.isa
));
28422 asm_fprintf (stream
, "\t.fpu %s\n", fpu_name
);
28424 if (!arch
->common
.extensions
)
28427 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
28433 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
28435 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28436 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28437 floating point instructions is disabled. So the following check
28438 restricts the printing of ".arch_extension mve" and
28439 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28440 this special behaviour because the feature bit "mve" and
28441 "mve_float" are not part of "fpu bits", so they are not cleared
28442 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28443 TARGET_HAVE_MVE_FLOAT are disabled. */
28444 if ((bitmap_bit_p (opt_bits
, isa_bit_mve
) && !TARGET_HAVE_MVE
)
28445 || (bitmap_bit_p (opt_bits
, isa_bit_mve_float
)
28446 && !TARGET_HAVE_MVE_FLOAT
))
28449 /* If every feature bit of this option is set in the target ISA
28450 specification, print out the option name. However, don't print
28451 anything if all the bits are part of the FPU specification. */
28452 if (bitmap_subset_p (opt_bits
, build_target
.isa
)
28453 && !bitmap_subset_p (opt_bits
, isa_all_fpubits_internal
))
28454 asm_fprintf (stream
, "\t.arch_extension %s\n", opt
->name
);
28460 arm_file_start (void)
28464 arm_print_asm_arch_directives
28465 (asm_out_file
, TREE_TARGET_OPTION (target_option_default_node
));
28469 /* If we have a named cpu, but we the assembler does not support that
28470 name via .cpu, put out a cpu name attribute; but don't do this if the
28471 name starts with the fictitious prefix, 'generic'. */
28472 if (arm_active_target
.core_name
28473 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_asmcpu
)
28474 && !startswith (arm_active_target
.core_name
, "generic"))
28476 const char* truncated_name
28477 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
28478 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_asmcpu
))
28479 asm_fprintf (asm_out_file
, "\t.eabi_attribute 5, \"%s\"\n",
28483 if (print_tune_info
)
28484 arm_print_tune_info ();
28486 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
28487 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28489 if (TARGET_HARD_FLOAT_ABI
)
28490 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28492 /* Some of these attributes only apply when the corresponding features
28493 are used. However we don't have any easy way of figuring this out.
28494 Conservatively record the setting that would have been used. */
28496 if (flag_rounding_math
)
28497 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28499 if (!flag_unsafe_math_optimizations
)
28501 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28502 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28504 if (flag_signaling_nans
)
28505 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28507 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28508 flag_finite_math_only
? 1 : 3);
28510 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28511 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28512 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28513 flag_short_enums
? 1 : 2);
28515 /* Tag_ABI_optimization_goals. */
28518 else if (optimize
>= 2)
28524 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
28526 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28529 if (arm_fp16_format
)
28530 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28531 (int) arm_fp16_format
);
28533 if (arm_lang_output_object_attributes_hook
)
28534 arm_lang_output_object_attributes_hook();
28537 default_file_start ();
28541 arm_file_end (void)
28545 /* Just in case the last function output in the assembler had non-default
28546 architecture directives, we force the assembler state back to the default
28547 set, so that any 'calculated' build attributes are based on the default
28548 options rather than the special options for that function. */
28549 arm_print_asm_arch_directives
28550 (asm_out_file
, TREE_TARGET_OPTION (target_option_default_node
));
28552 if (NEED_INDICATE_EXEC_STACK
)
28553 /* Add .note.GNU-stack. */
28554 file_end_indicate_exec_stack ();
28556 if (! thumb_call_reg_needed
)
28559 switch_to_section (text_section
);
28560 asm_fprintf (asm_out_file
, "\t.code 16\n");
28561 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
28563 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
28565 rtx label
= thumb_call_via_label
[regno
];
28569 targetm
.asm_out
.internal_label (asm_out_file
, "L",
28570 CODE_LABEL_NUMBER (label
));
28571 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
28577 /* Symbols in the text segment can be accessed without indirecting via the
28578 constant pool; it may take an extra binary operation, but this is still
28579 faster than indirecting via memory. Don't do this when not optimizing,
28580 since we won't be calculating al of the offsets necessary to do this
28584 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
28586 if (optimize
> 0 && TREE_CONSTANT (decl
))
28587 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
28589 default_encode_section_info (decl
, rtl
, first
);
28591 #endif /* !ARM_PE */
28594 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
28596 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
28597 && !strcmp (prefix
, "L"))
28599 arm_ccfsm_state
= 0;
28600 arm_target_insn
= NULL
;
28602 default_internal_label (stream
, prefix
, labelno
);
28605 /* Define classes to generate code as RTL or output asm to a file.
28606 Using templates then allows to use the same code to output code
28607 sequences in the two formats. */
28608 class thumb1_const_rtl
28611 thumb1_const_rtl (rtx dst
) : dst (dst
) {}
28613 void mov (HOST_WIDE_INT val
)
28615 emit_set_insn (dst
, GEN_INT (val
));
28618 void add (HOST_WIDE_INT val
)
28620 emit_set_insn (dst
, gen_rtx_PLUS (SImode
, dst
, GEN_INT (val
)));
28623 void ashift (HOST_WIDE_INT shift
)
28625 emit_set_insn (dst
, gen_rtx_ASHIFT (SImode
, dst
, GEN_INT (shift
)));
28630 emit_set_insn (dst
, gen_rtx_NEG (SImode
, dst
));
28637 class thumb1_const_print
28640 thumb1_const_print (FILE *f
, int regno
)
28643 dst_regname
= reg_names
[regno
];
28646 void mov (HOST_WIDE_INT val
)
28648 asm_fprintf (t_file
, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
28652 void add (HOST_WIDE_INT val
)
28654 asm_fprintf (t_file
, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
28658 void ashift (HOST_WIDE_INT shift
)
28660 asm_fprintf (t_file
, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
28661 dst_regname
, shift
);
28666 asm_fprintf (t_file
, "\trsbs\t%s, #0\n", dst_regname
);
28671 const char *dst_regname
;
28674 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28675 Avoid generating useless code when one of the bytes is zero. */
28678 thumb1_gen_const_int_1 (T dst
, HOST_WIDE_INT op1
)
28680 bool mov_done_p
= false;
28681 unsigned HOST_WIDE_INT val
= op1
;
28685 gcc_assert (op1
== trunc_int_for_mode (op1
, SImode
));
28693 /* For negative numbers with the first nine bits set, build the
28694 opposite of OP1, then negate it, it's generally shorter and not
28696 if ((val
& 0xFF800000) == 0xFF800000)
28698 thumb1_gen_const_int_1 (dst
, -op1
);
28703 /* In the general case, we need 7 instructions to build
28704 a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28705 do better if VAL is small enough, or
28706 right-shiftable by a suitable amount. If the
28707 right-shift enables to encode at least one less byte,
28708 it's worth it: we save a adds and a lsls at the
28709 expense of a final lsls. */
28710 int final_shift
= number_of_first_bit_set (val
);
28712 int leading_zeroes
= clz_hwi (val
);
28713 int number_of_bytes_needed
28714 = ((HOST_BITS_PER_WIDE_INT
- 1 - leading_zeroes
)
28715 / BITS_PER_UNIT
) + 1;
28716 int number_of_bytes_needed2
28717 = ((HOST_BITS_PER_WIDE_INT
- 1 - leading_zeroes
- final_shift
)
28718 / BITS_PER_UNIT
) + 1;
28720 if (number_of_bytes_needed2
< number_of_bytes_needed
)
28721 val
>>= final_shift
;
28725 /* If we are in a very small range, we can use either a single movs
28731 unsigned HOST_WIDE_INT high
= val
- 255;
28739 if (final_shift
> 0)
28740 dst
.ashift (final_shift
);
28744 /* General case, emit upper 3 bytes as needed. */
28745 for (i
= 0; i
< 3; i
++)
28747 unsigned HOST_WIDE_INT byte
= (val
>> (8 * (3 - i
))) & 0xff;
28751 /* We are about to emit new bits, stop accumulating a
28752 shift amount, and left-shift only if we have already
28753 emitted some upper bits. */
28756 dst
.ashift (shift
);
28762 /* Stop accumulating shift amount since we've just
28763 emitted some bits. */
28773 /* Emit lower byte. */
28775 dst
.mov (val
& 0xff);
28778 dst
.ashift (shift
);
28780 dst
.add (val
& 0xff);
28783 if (final_shift
> 0)
28784 dst
.ashift (final_shift
);
28788 /* Proxies for thumb1.md, since the thumb1_const_print and
28789 thumb1_const_rtl classes are not exported. */
28791 thumb1_gen_const_int_rtl (rtx dst
, HOST_WIDE_INT op1
)
28793 thumb1_const_rtl
t (dst
);
28794 thumb1_gen_const_int_1 (t
, op1
);
28798 thumb1_gen_const_int_print (rtx dst
, HOST_WIDE_INT op1
)
28800 thumb1_const_print
t (asm_out_file
, REGNO (dst
));
28801 thumb1_gen_const_int_1 (t
, op1
);
28804 /* Output code to add DELTA to the first argument, and then jump
28805 to FUNCTION. Used for C++ multiple inheritance. */
28808 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
28809 HOST_WIDE_INT
, tree function
)
28811 static int thunk_label
= 0;
28814 int mi_delta
= delta
;
28815 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
28817 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
28820 mi_delta
= - mi_delta
;
28822 final_start_function (emit_barrier (), file
, 1);
28826 int labelno
= thunk_label
++;
28827 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
28828 /* Thunks are entered in arm mode when available. */
28829 if (TARGET_THUMB1_ONLY
)
28831 /* push r3 so we can use it as a temporary. */
28832 /* TODO: Omit this save if r3 is not used. */
28833 fputs ("\tpush {r3}\n", file
);
28835 /* With -mpure-code, we cannot load the address from the
28836 constant pool: we build it explicitly. */
28837 if (target_pure_code
)
28839 fputs ("\tmovs\tr3, #:upper8_15:#", file
);
28840 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28841 fputc ('\n', file
);
28842 fputs ("\tlsls r3, #8\n", file
);
28843 fputs ("\tadds\tr3, #:upper0_7:#", file
);
28844 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28845 fputc ('\n', file
);
28846 fputs ("\tlsls r3, #8\n", file
);
28847 fputs ("\tadds\tr3, #:lower8_15:#", file
);
28848 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28849 fputc ('\n', file
);
28850 fputs ("\tlsls r3, #8\n", file
);
28851 fputs ("\tadds\tr3, #:lower0_7:#", file
);
28852 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28853 fputc ('\n', file
);
28856 fputs ("\tldr\tr3, ", file
);
28860 fputs ("\tldr\tr12, ", file
);
28863 if (!target_pure_code
)
28865 assemble_name (file
, label
);
28866 fputc ('\n', file
);
28871 /* If we are generating PIC, the ldr instruction below loads
28872 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28873 the address of the add + 8, so we have:
28875 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28878 Note that we have "+ 1" because some versions of GNU ld
28879 don't set the low bit of the result for R_ARM_REL32
28880 relocations against thumb function symbols.
28881 On ARMv6M this is +4, not +8. */
28882 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
28883 assemble_name (file
, labelpc
);
28884 fputs (":\n", file
);
28885 if (TARGET_THUMB1_ONLY
)
28887 /* This is 2 insns after the start of the thunk, so we know it
28888 is 4-byte aligned. */
28889 fputs ("\tadd\tr3, pc, r3\n", file
);
28890 fputs ("\tmov r12, r3\n", file
);
28893 fputs ("\tadd\tr12, pc, r12\n", file
);
28895 else if (TARGET_THUMB1_ONLY
)
28896 fputs ("\tmov r12, r3\n", file
);
28898 if (TARGET_THUMB1_ONLY
)
28900 if (mi_delta
> 255)
28902 /* With -mpure-code, we cannot load MI_DELTA from the
28903 constant pool: we build it explicitly. */
28904 if (target_pure_code
)
28906 thumb1_const_print
r3 (file
, 3);
28907 thumb1_gen_const_int_1 (r3
, mi_delta
);
28911 fputs ("\tldr\tr3, ", file
);
28912 assemble_name (file
, label
);
28913 fputs ("+4\n", file
);
28915 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
28916 mi_op
, this_regno
, this_regno
);
28918 else if (mi_delta
!= 0)
28920 /* Thumb1 unified syntax requires s suffix in instruction name when
28921 one of the operands is immediate. */
28922 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
28923 mi_op
, this_regno
, this_regno
,
28929 /* TODO: Use movw/movt for large constants when available. */
28930 while (mi_delta
!= 0)
28932 if ((mi_delta
& (3 << shift
)) == 0)
28936 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28937 mi_op
, this_regno
, this_regno
,
28938 mi_delta
& (0xff << shift
));
28939 mi_delta
&= ~(0xff << shift
);
28946 if (TARGET_THUMB1_ONLY
)
28947 fputs ("\tpop\t{r3}\n", file
);
28949 fprintf (file
, "\tbx\tr12\n");
28951 /* With -mpure-code, we don't need to emit literals for the
28952 function address and delta since we emitted code to build
28954 if (!target_pure_code
)
28956 ASM_OUTPUT_ALIGN (file
, 2);
28957 assemble_name (file
, label
);
28958 fputs (":\n", file
);
28961 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28962 rtx tem
= XEXP (DECL_RTL (function
), 0);
28963 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28964 pipeline offset is four rather than eight. Adjust the offset
28966 tem
= plus_constant (GET_MODE (tem
), tem
,
28967 TARGET_THUMB1_ONLY
? -3 : -7);
28968 tem
= gen_rtx_MINUS (GET_MODE (tem
),
28970 gen_rtx_SYMBOL_REF (Pmode
,
28971 ggc_strdup (labelpc
)));
28972 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
28975 /* Output ".word .LTHUNKn". */
28976 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
28978 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
28979 assemble_integer (GEN_INT (mi_delta
), 4, BITS_PER_WORD
, 1);
28984 fputs ("\tb\t", file
);
28985 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28986 if (NEED_PLT_RELOC
)
28987 fputs ("(PLT)", file
);
28988 fputc ('\n', file
);
28991 final_end_function ();
28994 /* MI thunk handling for TARGET_32BIT. */
28997 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
28998 HOST_WIDE_INT vcall_offset
, tree function
)
29000 const bool long_call_p
= arm_is_long_call_p (function
);
29002 /* On ARM, this_regno is R0 or R1 depending on
29003 whether the function returns an aggregate or not.
29005 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
29007 ? R1_REGNUM
: R0_REGNUM
);
29009 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
29010 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
29011 reload_completed
= 1;
29012 emit_note (NOTE_INSN_PROLOGUE_END
);
29014 /* Add DELTA to THIS_RTX. */
29016 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
29017 delta
, this_rtx
, this_rtx
, false);
29019 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
29020 if (vcall_offset
!= 0)
29022 /* Load *THIS_RTX. */
29023 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
29024 /* Compute *THIS_RTX + VCALL_OFFSET. */
29025 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
29027 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
29028 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
29029 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
29032 /* Generate a tail call to the target function. */
29033 if (!TREE_USED (function
))
29035 assemble_external (function
);
29036 TREE_USED (function
) = 1;
29038 rtx funexp
= XEXP (DECL_RTL (function
), 0);
29041 emit_move_insn (temp
, funexp
);
29044 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
29045 rtx_insn
*insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
29046 SIBLING_CALL_P (insn
) = 1;
29049 /* Indirect calls require a bit of fixup in PIC mode. */
29052 split_all_insns_noflow ();
29056 insn
= get_insns ();
29057 shorten_branches (insn
);
29058 final_start_function (insn
, file
, 1);
29059 final (insn
, file
, 1);
29060 final_end_function ();
29062 /* Stop pretending this is a post-reload pass. */
29063 reload_completed
= 0;
29066 /* Output code to add DELTA to the first argument, and then jump
29067 to FUNCTION. Used for C++ multiple inheritance. */
29070 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
29071 HOST_WIDE_INT vcall_offset
, tree function
)
29073 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk
));
29075 assemble_start_function (thunk
, fnname
);
29077 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
29079 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
29080 assemble_end_function (thunk
, fnname
);
29084 arm_emit_vector_const (FILE *file
, rtx x
)
29087 const char * pattern
;
29089 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
29091 switch (GET_MODE (x
))
29093 case E_V2SImode
: pattern
= "%08x"; break;
29094 case E_V4HImode
: pattern
= "%04x"; break;
29095 case E_V8QImode
: pattern
= "%02x"; break;
29096 default: gcc_unreachable ();
29099 fprintf (file
, "0x");
29100 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
29104 element
= CONST_VECTOR_ELT (x
, i
);
29105 fprintf (file
, pattern
, INTVAL (element
));
29111 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29112 HFmode constant pool entries are actually loaded with ldr. */
29114 arm_emit_fp16_const (rtx c
)
29118 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
29119 if (WORDS_BIG_ENDIAN
)
29120 assemble_zeros (2);
29121 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
29122 if (!WORDS_BIG_ENDIAN
)
29123 assemble_zeros (2);
29127 arm_output_load_gr (rtx
*operands
)
29134 if (!MEM_P (operands
[1])
29135 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
29136 || !REG_P (reg
= XEXP (sum
, 0))
29137 || !CONST_INT_P (offset
= XEXP (sum
, 1))
29138 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
29139 return "wldrw%?\t%0, %1";
29141 /* Fix up an out-of-range load of a GR register. */
29142 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
29143 wcgr
= operands
[0];
29145 output_asm_insn ("ldr%?\t%0, %1", operands
);
29147 operands
[0] = wcgr
;
29149 output_asm_insn ("tmcr%?\t%0, %1", operands
);
29150 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
29155 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29157 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29158 named arg and all anonymous args onto the stack.
29159 XXX I know the prologue shouldn't be pushing registers, but it is faster
29163 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
29164 const function_arg_info
&arg
,
29166 int second_time ATTRIBUTE_UNUSED
)
29168 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
29171 cfun
->machine
->uses_anonymous_args
= 1;
29172 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
29174 nregs
= pcum
->aapcs_ncrn
;
29175 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl
))
29178 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
29179 if (res
< 0 && warn_psabi
)
29180 inform (input_location
, "parameter passing for argument of "
29181 "type %qT changed in GCC 7.1", arg
.type
);
29185 if (res
> 1 && warn_psabi
)
29186 inform (input_location
,
29187 "parameter passing for argument of type "
29188 "%qT changed in GCC 9.1", arg
.type
);
29193 nregs
= pcum
->nregs
;
29195 if (nregs
< NUM_ARG_REGS
)
29196 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
29199 /* We can't rely on the caller doing the proper promotion when
29200 using APCS or ATPCS. */
29203 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
29205 return !TARGET_AAPCS_BASED
;
29208 static machine_mode
29209 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
29211 int *punsignedp ATTRIBUTE_UNUSED
,
29212 const_tree fntype ATTRIBUTE_UNUSED
,
29213 int for_return ATTRIBUTE_UNUSED
)
29215 if (GET_MODE_CLASS (mode
) == MODE_INT
29216 && GET_MODE_SIZE (mode
) < 4)
29224 arm_default_short_enums (void)
29226 return ARM_DEFAULT_SHORT_ENUMS
;
29230 /* AAPCS requires that anonymous bitfields affect structure alignment. */
29233 arm_align_anon_bitfield (void)
29235 return TARGET_AAPCS_BASED
;
29239 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
29242 arm_cxx_guard_type (void)
29244 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
29248 /* The EABI says test the least significant bit of a guard variable. */
29251 arm_cxx_guard_mask_bit (void)
29253 return TARGET_AAPCS_BASED
;
29257 /* The EABI specifies that all array cookies are 8 bytes long. */
29260 arm_get_cookie_size (tree type
)
29264 if (!TARGET_AAPCS_BASED
)
29265 return default_cxx_get_cookie_size (type
);
29267 size
= build_int_cst (sizetype
, 8);
29272 /* The EABI says that array cookies should also contain the element size. */
29275 arm_cookie_has_size (void)
29277 return TARGET_AAPCS_BASED
;
29281 /* The EABI says constructors and destructors should return a pointer to
29282 the object constructed/destroyed. */
29285 arm_cxx_cdtor_returns_this (void)
29287 return TARGET_AAPCS_BASED
;
29290 /* The EABI says that an inline function may never be the key
29294 arm_cxx_key_method_may_be_inline (void)
29296 return !TARGET_AAPCS_BASED
;
29300 arm_cxx_determine_class_data_visibility (tree decl
)
29302 if (!TARGET_AAPCS_BASED
29303 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
29306 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29307 is exported. However, on systems without dynamic vague linkage,
29308 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
29309 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
29310 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
29312 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
29313 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
29317 arm_cxx_class_data_always_comdat (void)
29319 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29320 vague linkage if the class has no key function. */
29321 return !TARGET_AAPCS_BASED
;
29325 /* The EABI says __aeabi_atexit should be used to register static
29329 arm_cxx_use_aeabi_atexit (void)
29331 return TARGET_AAPCS_BASED
;
29336 arm_set_return_address (rtx source
, rtx scratch
)
29338 arm_stack_offsets
*offsets
;
29339 HOST_WIDE_INT delta
;
29341 unsigned long saved_regs
;
29343 offsets
= arm_get_frame_offsets ();
29344 saved_regs
= offsets
->saved_regs_mask
;
29346 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
29347 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
29350 if (frame_pointer_needed
)
29351 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
29354 /* LR will be the first saved register. */
29355 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
29360 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
29361 GEN_INT (delta
& ~4095)));
29366 addr
= stack_pointer_rtx
;
29368 addr
= plus_constant (Pmode
, addr
, delta
);
29371 /* The store needs to be marked to prevent DSE from deleting
29372 it as dead if it is based on fp. */
29373 mem
= gen_frame_mem (Pmode
, addr
);
29374 MEM_VOLATILE_P (mem
) = true;
29375 emit_move_insn (mem
, source
);
29381 thumb_set_return_address (rtx source
, rtx scratch
)
29383 arm_stack_offsets
*offsets
;
29384 HOST_WIDE_INT delta
;
29385 HOST_WIDE_INT limit
;
29388 unsigned long mask
;
29392 offsets
= arm_get_frame_offsets ();
29393 mask
= offsets
->saved_regs_mask
;
29394 if (mask
& (1 << LR_REGNUM
))
29397 /* Find the saved regs. */
29398 if (frame_pointer_needed
)
29400 delta
= offsets
->soft_frame
- offsets
->saved_args
;
29401 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
29407 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
29410 /* Allow for the stack frame. */
29411 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
29413 /* The link register is always the first saved register. */
29416 /* Construct the address. */
29417 addr
= gen_rtx_REG (SImode
, reg
);
29420 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
29421 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
29425 addr
= plus_constant (Pmode
, addr
, delta
);
29427 /* The store needs to be marked to prevent DSE from deleting
29428 it as dead if it is based on fp. */
29429 mem
= gen_frame_mem (Pmode
, addr
);
29430 MEM_VOLATILE_P (mem
) = true;
29431 emit_move_insn (mem
, source
);
29434 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
29437 /* Implements target hook vector_mode_supported_p. */
29439 arm_vector_mode_supported_p (machine_mode mode
)
29441 /* Neon also supports V2SImode, etc. listed in the clause below. */
29442 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
29443 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
29444 || mode
== V2DImode
|| mode
== V8HFmode
|| mode
== V4BFmode
29445 || mode
== V8BFmode
))
29448 if ((TARGET_NEON
|| TARGET_IWMMXT
)
29449 && ((mode
== V2SImode
)
29450 || (mode
== V4HImode
)
29451 || (mode
== V8QImode
)))
29454 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
29455 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
29456 || mode
== V2HAmode
))
29459 if (TARGET_HAVE_MVE
29460 && (mode
== V2DImode
|| mode
== V4SImode
|| mode
== V8HImode
29461 || mode
== V16QImode
29462 || mode
== V16BImode
|| mode
== V8BImode
|| mode
== V4BImode
))
29465 if (TARGET_HAVE_MVE_FLOAT
29466 && (mode
== V2DFmode
|| mode
== V4SFmode
|| mode
== V8HFmode
))
29472 /* Implements target hook array_mode_supported_p. */
29475 arm_array_mode_supported_p (machine_mode mode
,
29476 unsigned HOST_WIDE_INT nelems
)
29478 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29479 for now, as the lane-swapping logic needs to be extended in the expanders.
29480 See PR target/82518. */
29481 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
29482 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
29483 && (nelems
>= 2 && nelems
<= 4))
29486 if (TARGET_HAVE_MVE
&& !BYTES_BIG_ENDIAN
29487 && VALID_MVE_MODE (mode
) && (nelems
== 2 || nelems
== 4))
29493 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29494 registers when autovectorizing for Neon, at least until multiple vector
29495 widths are supported properly by the middle-end. */
29497 static machine_mode
29498 arm_preferred_simd_mode (scalar_mode mode
)
29504 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HFmode
: V8HFmode
;
29506 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
29508 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
29510 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
29512 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
29514 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
29521 if (TARGET_REALLY_IWMMXT
)
29534 if (TARGET_HAVE_MVE
)
29547 if (TARGET_HAVE_MVE_FLOAT
)
29561 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29563 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29564 using r0-r4 for function arguments, r7 for the stack frame and don't have
29565 enough left over to do doubleword arithmetic. For Thumb-2 all the
29566 potentially problematic instructions accept high registers so this is not
29567 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29568 that require many low registers. */
29570 arm_class_likely_spilled_p (reg_class_t rclass
)
29572 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
29573 || rclass
== CC_REG
)
29576 return default_class_likely_spilled_p (rclass
);
29579 /* Implements target hook small_register_classes_for_mode_p. */
29581 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
29583 return TARGET_THUMB1
;
29586 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29587 ARM insns and therefore guarantee that the shift count is modulo 256.
29588 DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29589 guarantee no particular behavior for out-of-range counts. */
29591 static unsigned HOST_WIDE_INT
29592 arm_shift_truncation_mask (machine_mode mode
)
29594 return mode
== SImode
? 255 : 0;
29598 /* Map internal gcc register numbers to DWARF2 register numbers. */
29601 arm_debugger_regno (unsigned int regno
)
29606 if (IS_VFP_REGNUM (regno
))
29608 /* See comment in arm_dwarf_register_span. */
29609 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29610 return 64 + regno
- FIRST_VFP_REGNUM
;
29612 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
29615 if (IS_IWMMXT_GR_REGNUM (regno
))
29616 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
29618 if (IS_IWMMXT_REGNUM (regno
))
29619 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
29621 return DWARF_FRAME_REGISTERS
;
29624 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29625 GCC models tham as 64 32-bit registers, so we need to describe this to
29626 the DWARF generation code. Other registers can use the default. */
29628 arm_dwarf_register_span (rtx rtl
)
29636 regno
= REGNO (rtl
);
29637 if (!IS_VFP_REGNUM (regno
))
29640 /* XXX FIXME: The EABI defines two VFP register ranges:
29641 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29643 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29644 corresponding D register. Until GDB supports this, we shall use the
29645 legacy encodings. We also use these encodings for D0-D15 for
29646 compatibility with older debuggers. */
29647 mode
= GET_MODE (rtl
);
29648 if (GET_MODE_SIZE (mode
) < 8)
29651 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29653 nregs
= GET_MODE_SIZE (mode
) / 4;
29654 for (i
= 0; i
< nregs
; i
+= 2)
29655 if (TARGET_BIG_END
)
29657 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29658 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
29662 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
29663 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29668 nregs
= GET_MODE_SIZE (mode
) / 8;
29669 for (i
= 0; i
< nregs
; i
++)
29670 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
29673 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
29676 #if ARM_UNWIND_INFO
29677 /* Emit unwind directives for a store-multiple instruction or stack pointer
29678 push during alignment.
29679 These should only ever be generated by the function prologue code, so
29680 expect them to have a particular form.
29681 The store-multiple instruction sometimes pushes pc as the last register,
29682 although it should not be tracked into unwind information, or for -Os
29683 sometimes pushes some dummy registers before first register that needs
29684 to be tracked in unwind information; such dummy registers are there just
29685 to avoid separate stack adjustment, and will not be restored in the
29689 arm_unwind_emit_sequence (FILE * out_file
, rtx p
)
29692 HOST_WIDE_INT offset
;
29693 HOST_WIDE_INT nregs
;
29697 unsigned padfirst
= 0, padlast
= 0;
29700 e
= XVECEXP (p
, 0, 0);
29701 gcc_assert (GET_CODE (e
) == SET
);
29703 /* First insn will adjust the stack pointer. */
29704 gcc_assert (GET_CODE (e
) == SET
29705 && REG_P (SET_DEST (e
))
29706 && REGNO (SET_DEST (e
)) == SP_REGNUM
29707 && GET_CODE (SET_SRC (e
)) == PLUS
);
29709 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
29710 nregs
= XVECLEN (p
, 0) - 1;
29711 gcc_assert (nregs
);
29713 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
29716 /* For -Os dummy registers can be pushed at the beginning to
29717 avoid separate stack pointer adjustment. */
29718 e
= XVECEXP (p
, 0, 1);
29719 e
= XEXP (SET_DEST (e
), 0);
29720 if (GET_CODE (e
) == PLUS
)
29721 padfirst
= INTVAL (XEXP (e
, 1));
29722 gcc_assert (padfirst
== 0 || optimize_size
);
29723 /* The function prologue may also push pc, but not annotate it as it is
29724 never restored. We turn this into a stack pointer adjustment. */
29725 e
= XVECEXP (p
, 0, nregs
);
29726 e
= XEXP (SET_DEST (e
), 0);
29727 if (GET_CODE (e
) == PLUS
)
29728 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
29730 padlast
= offset
- 4;
29731 gcc_assert (padlast
== 0 || padlast
== 4);
29733 fprintf (out_file
, "\t.pad #4\n");
29735 fprintf (out_file
, "\t.save {");
29737 else if (IS_VFP_REGNUM (reg
))
29740 fprintf (out_file
, "\t.vsave {");
29743 /* Unknown register type. */
29744 gcc_unreachable ();
29746 /* If the stack increment doesn't match the size of the saved registers,
29747 something has gone horribly wrong. */
29748 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
29752 /* The remaining insns will describe the stores. */
29753 for (i
= 1; i
<= nregs
; i
++)
29755 /* Expect (set (mem <addr>) (reg)).
29756 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29757 e
= XVECEXP (p
, 0, i
);
29758 gcc_assert (GET_CODE (e
) == SET
29759 && MEM_P (SET_DEST (e
))
29760 && REG_P (SET_SRC (e
)));
29762 reg
= REGNO (SET_SRC (e
));
29763 gcc_assert (reg
>= lastreg
);
29766 fprintf (out_file
, ", ");
29767 /* We can't use %r for vfp because we need to use the
29768 double precision register names. */
29769 if (IS_VFP_REGNUM (reg
))
29770 asm_fprintf (out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
29772 asm_fprintf (out_file
, "%r", reg
);
29776 /* Check that the addresses are consecutive. */
29777 e
= XEXP (SET_DEST (e
), 0);
29778 if (GET_CODE (e
) == PLUS
)
29779 gcc_assert (REG_P (XEXP (e
, 0))
29780 && REGNO (XEXP (e
, 0)) == SP_REGNUM
29781 && CONST_INT_P (XEXP (e
, 1))
29782 && offset
== INTVAL (XEXP (e
, 1)));
29786 && REGNO (e
) == SP_REGNUM
);
29787 offset
+= reg_size
;
29790 fprintf (out_file
, "}\n");
29792 fprintf (out_file
, "\t.pad #%d\n", padfirst
);
29795 /* Emit unwind directives for a SET. */
29798 arm_unwind_emit_set (FILE * out_file
, rtx p
)
29806 switch (GET_CODE (e0
))
29809 /* Pushing a single register. */
29810 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
29811 || !REG_P (XEXP (XEXP (e0
, 0), 0))
29812 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
29815 asm_fprintf (out_file
, "\t.save ");
29816 if (IS_VFP_REGNUM (REGNO (e1
)))
29817 asm_fprintf(out_file
, "{d%d}\n",
29818 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
29820 asm_fprintf(out_file
, "{%r}\n", REGNO (e1
));
29824 if (REGNO (e0
) == SP_REGNUM
)
29826 /* A stack increment. */
29827 if (GET_CODE (e1
) != PLUS
29828 || !REG_P (XEXP (e1
, 0))
29829 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
29830 || !CONST_INT_P (XEXP (e1
, 1)))
29833 asm_fprintf (out_file
, "\t.pad #%wd\n",
29834 -INTVAL (XEXP (e1
, 1)));
29836 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
29838 HOST_WIDE_INT offset
;
29840 if (GET_CODE (e1
) == PLUS
)
29842 if (!REG_P (XEXP (e1
, 0))
29843 || !CONST_INT_P (XEXP (e1
, 1)))
29845 reg
= REGNO (XEXP (e1
, 0));
29846 offset
= INTVAL (XEXP (e1
, 1));
29847 asm_fprintf (out_file
, "\t.setfp %r, %r, #%wd\n",
29848 HARD_FRAME_POINTER_REGNUM
, reg
,
29851 else if (REG_P (e1
))
29854 asm_fprintf (out_file
, "\t.setfp %r, %r\n",
29855 HARD_FRAME_POINTER_REGNUM
, reg
);
29860 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
29862 /* Move from sp to reg. */
29863 asm_fprintf (out_file
, "\t.movsp %r\n", REGNO (e0
));
29865 else if (GET_CODE (e1
) == PLUS
29866 && REG_P (XEXP (e1
, 0))
29867 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
29868 && CONST_INT_P (XEXP (e1
, 1)))
29870 /* Set reg to offset from sp. */
29871 asm_fprintf (out_file
, "\t.movsp %r, #%d\n",
29872 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
29884 /* Emit unwind directives for the given insn. */
29887 arm_unwind_emit (FILE * out_file
, rtx_insn
*insn
)
29890 bool handled_one
= false;
29892 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
29895 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
29896 && (TREE_NOTHROW (current_function_decl
)
29897 || crtl
->all_throwers_are_sibcalls
))
29900 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
29903 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
29905 switch (REG_NOTE_KIND (note
))
29907 case REG_FRAME_RELATED_EXPR
:
29908 pat
= XEXP (note
, 0);
29911 case REG_CFA_REGISTER
:
29912 pat
= XEXP (note
, 0);
29915 pat
= PATTERN (insn
);
29916 if (GET_CODE (pat
) == PARALLEL
)
29917 pat
= XVECEXP (pat
, 0, 0);
29920 /* Only emitted for IS_STACKALIGN re-alignment. */
29925 src
= SET_SRC (pat
);
29926 dest
= SET_DEST (pat
);
29928 gcc_assert (src
== stack_pointer_rtx
);
29929 reg
= REGNO (dest
);
29930 asm_fprintf (out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29933 handled_one
= true;
29936 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29937 to get correct dwarf information for shrink-wrap. We should not
29938 emit unwind information for it because these are used either for
29939 pretend arguments or notes to adjust sp and restore registers from
29941 case REG_CFA_DEF_CFA
:
29942 case REG_CFA_ADJUST_CFA
:
29943 case REG_CFA_RESTORE
:
29946 case REG_CFA_EXPRESSION
:
29947 case REG_CFA_OFFSET
:
29948 /* ??? Only handling here what we actually emit. */
29949 gcc_unreachable ();
29957 pat
= PATTERN (insn
);
29960 switch (GET_CODE (pat
))
29963 arm_unwind_emit_set (out_file
, pat
);
29967 /* Store multiple. */
29968 arm_unwind_emit_sequence (out_file
, pat
);
29977 /* Output a reference from a function exception table to the type_info
29978 object X. The EABI specifies that the symbol should be relocated by
29979 an R_ARM_TARGET2 relocation. */
29982 arm_output_ttype (rtx x
)
29984 fputs ("\t.word\t", asm_out_file
);
29985 output_addr_const (asm_out_file
, x
);
29986 /* Use special relocations for symbol references. */
29987 if (!CONST_INT_P (x
))
29988 fputs ("(TARGET2)", asm_out_file
);
29989 fputc ('\n', asm_out_file
);
29994 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29997 arm_asm_emit_except_personality (rtx personality
)
29999 fputs ("\t.personality\t", asm_out_file
);
30000 output_addr_const (asm_out_file
, personality
);
30001 fputc ('\n', asm_out_file
);
30003 #endif /* ARM_UNWIND_INFO */
30005 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
30008 arm_asm_init_sections (void)
30010 #if ARM_UNWIND_INFO
30011 exception_section
= get_unnamed_section (0, output_section_asm_op
,
30013 #endif /* ARM_UNWIND_INFO */
30015 #ifdef OBJECT_FORMAT_ELF
30016 if (target_pure_code
)
30017 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
30021 /* Output unwind directives for the start/end of a function. */
30024 arm_output_fn_unwind (FILE * f
, bool prologue
)
30026 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
30030 fputs ("\t.fnstart\n", f
);
30033 /* If this function will never be unwound, then mark it as such.
30034 The came condition is used in arm_unwind_emit to suppress
30035 the frame annotations. */
30036 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
30037 && (TREE_NOTHROW (current_function_decl
)
30038 || crtl
->all_throwers_are_sibcalls
))
30039 fputs("\t.cantunwind\n", f
);
30041 fputs ("\t.fnend\n", f
);
30046 arm_emit_tls_decoration (FILE *fp
, rtx x
)
30048 enum tls_reloc reloc
;
30051 val
= XVECEXP (x
, 0, 0);
30052 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
30054 output_addr_const (fp
, val
);
30059 fputs ("(tlsgd)", fp
);
30061 case TLS_GD32_FDPIC
:
30062 fputs ("(tlsgd_fdpic)", fp
);
30065 fputs ("(tlsldm)", fp
);
30067 case TLS_LDM32_FDPIC
:
30068 fputs ("(tlsldm_fdpic)", fp
);
30071 fputs ("(tlsldo)", fp
);
30074 fputs ("(gottpoff)", fp
);
30076 case TLS_IE32_FDPIC
:
30077 fputs ("(gottpoff_fdpic)", fp
);
30080 fputs ("(tpoff)", fp
);
30083 fputs ("(tlsdesc)", fp
);
30086 gcc_unreachable ();
30095 fputs (" + (. - ", fp
);
30096 output_addr_const (fp
, XVECEXP (x
, 0, 2));
30097 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30098 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
30099 output_addr_const (fp
, XVECEXP (x
, 0, 3));
30109 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
30112 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
30114 gcc_assert (size
== 4);
30115 fputs ("\t.word\t", file
);
30116 output_addr_const (file
, x
);
30117 fputs ("(tlsldo)", file
);
30120 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
30123 arm_output_addr_const_extra (FILE *fp
, rtx x
)
30125 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
30126 return arm_emit_tls_decoration (fp
, x
);
30127 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
30130 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
30132 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
30133 assemble_name_raw (fp
, label
);
30137 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
30139 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
30143 output_addr_const (fp
, XVECEXP (x
, 0, 0));
30147 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
30149 output_addr_const (fp
, XVECEXP (x
, 0, 0));
30153 output_addr_const (fp
, XVECEXP (x
, 0, 1));
30157 else if (GET_CODE (x
) == CONST_VECTOR
)
30158 return arm_emit_vector_const (fp
, x
);
30163 /* Output assembly for a shift instruction.
30164 SET_FLAGS determines how the instruction modifies the condition codes.
30165 0 - Do not set condition codes.
30166 1 - Set condition codes.
30167 2 - Use smallest instruction. */
30169 arm_output_shift(rtx
* operands
, int set_flags
)
30172 static const char flag_chars
[3] = {'?', '.', '!'};
30177 c
= flag_chars
[set_flags
];
30178 shift
= shift_op(operands
[3], &val
);
30182 operands
[2] = GEN_INT(val
);
30183 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
30186 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
30188 output_asm_insn (pattern
, operands
);
30192 /* Output assembly for a WMMX immediate shift instruction. */
30194 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
30196 int shift
= INTVAL (operands
[2]);
30198 machine_mode opmode
= GET_MODE (operands
[0]);
30200 gcc_assert (shift
>= 0);
30202 /* If the shift value in the register versions is > 63 (for D qualifier),
30203 31 (for W qualifier) or 15 (for H qualifier). */
30204 if (((opmode
== V4HImode
) && (shift
> 15))
30205 || ((opmode
== V2SImode
) && (shift
> 31))
30206 || ((opmode
== DImode
) && (shift
> 63)))
30210 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
30211 output_asm_insn (templ
, operands
);
30212 if (opmode
== DImode
)
30214 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
30215 output_asm_insn (templ
, operands
);
30220 /* The destination register will contain all zeros. */
30221 sprintf (templ
, "wzero\t%%0");
30222 output_asm_insn (templ
, operands
);
30227 if ((opmode
== DImode
) && (shift
> 32))
30229 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
30230 output_asm_insn (templ
, operands
);
30231 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
30232 output_asm_insn (templ
, operands
);
30236 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
30237 output_asm_insn (templ
, operands
);
30242 /* Output assembly for a WMMX tinsr instruction. */
30244 arm_output_iwmmxt_tinsr (rtx
*operands
)
30246 int mask
= INTVAL (operands
[3]);
30249 int units
= mode_nunits
[GET_MODE (operands
[0])];
30250 gcc_assert ((mask
& (mask
- 1)) == 0);
30251 for (i
= 0; i
< units
; ++i
)
30253 if ((mask
& 0x01) == 1)
30259 gcc_assert (i
< units
);
30261 switch (GET_MODE (operands
[0]))
30264 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
30267 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
30270 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
30273 gcc_unreachable ();
30276 output_asm_insn (templ
, operands
);
30281 /* Output a Thumb-1 casesi dispatch sequence. */
30283 thumb1_output_casesi (rtx
*operands
)
30285 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
30287 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
30289 switch (GET_MODE(diff_vec
))
30292 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
30293 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30295 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
30296 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30298 return "bl\t%___gnu_thumb1_case_si";
30300 gcc_unreachable ();
30304 /* Output a Thumb-2 casesi instruction. */
30306 thumb2_output_casesi (rtx
*operands
)
30308 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
30310 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
30312 output_asm_insn ("cmp\t%0, %1", operands
);
30313 output_asm_insn ("bhi\t%l3", operands
);
30314 switch (GET_MODE(diff_vec
))
30317 return "tbb\t[%|pc, %0]";
30319 return "tbh\t[%|pc, %0, lsl #1]";
30323 output_asm_insn ("adr\t%4, %l2", operands
);
30324 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
30325 output_asm_insn ("add\t%4, %4, %5", operands
);
30330 output_asm_insn ("adr\t%4, %l2", operands
);
30331 return "ldr\t%|pc, [%4, %0, lsl #2]";
30334 gcc_unreachable ();
30338 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
30339 per-core tuning structs. */
30341 arm_issue_rate (void)
30343 return current_tune
->issue_rate
;
30346 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
30348 arm_sched_variable_issue (FILE *, int, rtx_insn
*insn
, int more
)
30350 if (DEBUG_INSN_P (insn
))
30353 rtx_code code
= GET_CODE (PATTERN (insn
));
30354 if (code
== USE
|| code
== CLOBBER
)
30357 if (get_attr_type (insn
) == TYPE_NO_INSN
)
30363 /* Return how many instructions should scheduler lookahead to choose the
30366 arm_first_cycle_multipass_dfa_lookahead (void)
30368 int issue_rate
= arm_issue_rate ();
30370 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
30373 /* Enable modeling of L2 auto-prefetcher. */
30375 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
30377 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
30381 arm_mangle_type (const_tree type
)
30383 /* The ARM ABI documents (10th October 2008) say that "__va_list"
30384 has to be managled as if it is in the "std" namespace. */
30385 if (TARGET_AAPCS_BASED
30386 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
30387 return "St9__va_list";
30389 /* Half-precision floating point types. */
30390 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
30392 if (TYPE_MAIN_VARIANT (type
) == float16_type_node
)
30394 if (TYPE_MODE (type
) == BFmode
)
30400 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30402 if (TYPE_NAME (type
) != NULL
)
30403 return arm_mangle_builtin_type (type
);
30405 /* Use the default mangling. */
30409 /* Order of allocation of core registers for Thumb: this allocation is
30410 written over the corresponding initial entries of the array
30411 initialized with REG_ALLOC_ORDER. We allocate all low registers
30412 first. Saving and restoring a low register is usually cheaper than
30413 using a call-clobbered high register. */
30415 static const int thumb_core_reg_alloc_order
[] =
30417 3, 2, 1, 0, 4, 5, 6, 7,
30418 12, 14, 8, 9, 10, 11
30421 /* Adjust register allocation order when compiling for Thumb. */
30424 arm_order_regs_for_local_alloc (void)
30426 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
30427 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
30429 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
30430 sizeof (thumb_core_reg_alloc_order
));
30433 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30436 arm_frame_pointer_required (void)
30438 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
30441 /* If the function receives nonlocal gotos, it needs to save the frame
30442 pointer in the nonlocal_goto_save_area object. */
30443 if (cfun
->has_nonlocal_label
)
30446 /* The frame pointer is required for non-leaf APCS frames. */
30447 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
30450 /* If we are probing the stack in the prologue, we will have a faulting
30451 instruction prior to the stack adjustment and this requires a frame
30452 pointer if we want to catch the exception using the EABI unwinder. */
30453 if (!IS_INTERRUPT (arm_current_func_type ())
30454 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
30455 || flag_stack_clash_protection
)
30456 && arm_except_unwind_info (&global_options
) == UI_TARGET
30457 && cfun
->can_throw_non_call_exceptions
)
30459 HOST_WIDE_INT size
= get_frame_size ();
30461 /* That's irrelevant if there is no stack adjustment. */
30465 /* That's relevant only if there is a stack probe. */
30466 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
30468 /* We don't have the final size of the frame so adjust. */
30469 size
+= 32 * UNITS_PER_WORD
;
30470 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
30480 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30481 All modes except THUMB1 have conditional execution.
30482 If we have conditional arithmetic, return false before reload to
30483 enable some ifcvt transformations. */
30485 arm_have_conditional_execution (void)
30487 bool has_cond_exec
, enable_ifcvt_trans
;
30489 /* Only THUMB1 cannot support conditional execution. */
30490 has_cond_exec
= !TARGET_THUMB1
;
30492 /* Enable ifcvt transformations if we have conditional arithmetic, but only
30494 enable_ifcvt_trans
= TARGET_COND_ARITH
&& !reload_completed
;
30496 return has_cond_exec
&& !enable_ifcvt_trans
;
30499 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30500 static HOST_WIDE_INT
30501 arm_vector_alignment (const_tree type
)
30503 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
30505 if (TARGET_AAPCS_BASED
)
30506 align
= MIN (align
, 64);
30511 static unsigned int
30512 arm_autovectorize_vector_modes (vector_modes
*modes
, bool)
30514 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
30516 modes
->safe_push (V16QImode
);
30517 modes
->safe_push (V8QImode
);
30523 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
30525 /* Vectors which aren't in packed structures will not be less aligned than
30526 the natural alignment of their element type, so this is safe. */
30527 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30530 return default_builtin_vector_alignment_reachable (type
, is_packed
);
30534 arm_builtin_support_vector_misalignment (machine_mode mode
,
30535 const_tree type
, int misalignment
,
30538 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30540 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
30545 /* If the misalignment is unknown, we should be able to handle the access
30546 so long as it is not to a member of a packed data structure. */
30547 if (misalignment
== -1)
30550 /* Return true if the misalignment is a multiple of the natural alignment
30551 of the vector's element type. This is probably always going to be
30552 true in practice, since we've already established that this isn't a
30554 return ((misalignment
% align
) == 0);
30557 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
30562 arm_conditional_register_usage (void)
30566 if (TARGET_THUMB1
&& optimize_size
)
30568 /* When optimizing for size on Thumb-1, it's better not
30569 to use the HI regs, because of the overhead of
30571 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
30572 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
30575 /* The link register can be clobbered by any branch insn,
30576 but we have no way to track that at present, so mark
30577 it as unavailable. */
30579 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
30581 if (TARGET_32BIT
&& TARGET_VFP_BASE
)
30583 /* VFPv3 registers are disabled when earlier VFP
30584 versions are selected due to the definition of
30585 LAST_VFP_REGNUM. */
30586 for (regno
= FIRST_VFP_REGNUM
;
30587 regno
<= LAST_VFP_REGNUM
; ++ regno
)
30589 fixed_regs
[regno
] = 0;
30590 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
30591 || regno
>= FIRST_VFP_REGNUM
+ 32;
30593 if (TARGET_HAVE_MVE
)
30594 fixed_regs
[VPR_REGNUM
] = 0;
30597 if (TARGET_REALLY_IWMMXT
&& !TARGET_GENERAL_REGS_ONLY
)
30599 regno
= FIRST_IWMMXT_GR_REGNUM
;
30600 /* The 2002/10/09 revision of the XScale ABI has wCG0
30601 and wCG1 as call-preserved registers. The 2002/11/21
30602 revision changed this so that all wCG registers are
30603 scratch registers. */
30604 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
30605 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
30606 fixed_regs
[regno
] = 0;
30607 /* The XScale ABI has wR0 - wR9 as scratch registers,
30608 the rest as call-preserved registers. */
30609 for (regno
= FIRST_IWMMXT_REGNUM
;
30610 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
30612 fixed_regs
[regno
] = 0;
30613 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
30617 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
30619 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30620 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30622 else if (TARGET_APCS_STACK
)
30624 fixed_regs
[10] = 1;
30625 call_used_regs
[10] = 1;
30627 /* -mcaller-super-interworking reserves r11 for calls to
30628 _interwork_r11_call_via_rN(). Making the register global
30629 is an easy way of ensuring that it remains valid for all
30631 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
30632 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
30634 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30635 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30636 if (TARGET_CALLER_INTERWORKING
)
30637 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30640 /* The Q and GE bits are only accessed via special ACLE patterns. */
30641 CLEAR_HARD_REG_BIT (operand_reg_set
, APSRQ_REGNUM
);
30642 CLEAR_HARD_REG_BIT (operand_reg_set
, APSRGE_REGNUM
);
30644 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30648 arm_preferred_rename_class (reg_class_t rclass
)
30650 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30651 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30652 and code size can be reduced. */
30653 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
30659 /* Compute the attribute "length" of insn "*push_multi".
30660 So this function MUST be kept in sync with that insn pattern. */
30662 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
30664 int i
, regno
, hi_reg
;
30665 int num_saves
= XVECLEN (parallel_op
, 0);
30675 regno
= REGNO (first_op
);
30676 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30677 list is 8-bit. Normally this means all registers in the list must be
30678 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
30679 encodings. There is one exception for PUSH that LR in HI_REGS can be used
30680 with 16-bit encoding. */
30681 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30682 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
30684 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
30685 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30693 /* Compute the attribute "length" of insn. Currently, this function is used
30694 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30695 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
30696 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
30697 true if OPERANDS contains insn which explicit updates base register. */
30700 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
30709 rtx parallel_op
= operands
[0];
30710 /* Initialize to elements number of PARALLEL. */
30711 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
30712 /* Initialize the value to base register. */
30713 unsigned regno
= REGNO (operands
[1]);
30714 /* Skip return and write back pattern.
30715 We only need register pop pattern for later analysis. */
30716 unsigned first_indx
= 0;
30717 first_indx
+= return_pc
? 1 : 0;
30718 first_indx
+= write_back_p
? 1 : 0;
30720 /* A pop operation can be done through LDM or POP. If the base register is SP
30721 and if it's with write back, then a LDM will be alias of POP. */
30722 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
30723 bool ldm_p
= !pop_p
;
30725 /* Check base register for LDM. */
30726 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
30729 /* Check each register in the list. */
30730 for (; indx
>= first_indx
; indx
--)
30732 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
30733 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
30734 comment in arm_attr_length_push_multi. */
30735 if (REGNO_REG_CLASS (regno
) == HI_REGS
30736 && (regno
!= PC_REGNUM
|| ldm_p
))
30743 /* Compute the number of instructions emitted by output_move_double. */
30745 arm_count_output_move_double_insns (rtx
*operands
)
30749 /* output_move_double may modify the operands array, so call it
30750 here on a copy of the array. */
30751 ops
[0] = operands
[0];
30752 ops
[1] = operands
[1];
30753 output_move_double (ops
, false, &count
);
30757 /* Same as above, but operands are a register/memory pair in SImode.
30758 Assumes operands has the base register in position 0 and memory in position
30759 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
30761 arm_count_ldrdstrd_insns (rtx
*operands
, bool load
)
30765 int regnum
, memnum
;
30767 regnum
= 0, memnum
= 1;
30769 regnum
= 1, memnum
= 0;
30770 ops
[regnum
] = gen_rtx_REG (DImode
, REGNO (operands
[0]));
30771 ops
[memnum
] = adjust_address (operands
[2], DImode
, 0);
30772 output_move_double (ops
, false, &count
);
30778 vfp3_const_double_for_fract_bits (rtx operand
)
30780 REAL_VALUE_TYPE r0
;
30782 if (!CONST_DOUBLE_P (operand
))
30785 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
30786 if (exact_real_inverse (DFmode
, &r0
)
30787 && !REAL_VALUE_NEGATIVE (r0
))
30789 if (exact_real_truncate (DFmode
, &r0
))
30791 HOST_WIDE_INT value
= real_to_integer (&r0
);
30792 value
= value
& 0xffffffff;
30793 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
30795 int ret
= exact_log2 (value
);
30796 gcc_assert (IN_RANGE (ret
, 0, 31));
30804 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30805 log2 is in [1, 32], return that log2. Otherwise return -1.
30806 This is used in the patterns for vcvt.s32.f32 floating-point to
30807 fixed-point conversions. */
30810 vfp3_const_double_for_bits (rtx x
)
30812 const REAL_VALUE_TYPE
*r
;
30814 if (!CONST_DOUBLE_P (x
))
30817 r
= CONST_DOUBLE_REAL_VALUE (x
);
30819 if (REAL_VALUE_NEGATIVE (*r
)
30820 || REAL_VALUE_ISNAN (*r
)
30821 || REAL_VALUE_ISINF (*r
)
30822 || !real_isinteger (r
, SFmode
))
30825 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
30827 /* The exact_log2 above will have returned -1 if this is
30828 not an exact log2. */
30829 if (!IN_RANGE (hwint
, 1, 32))
30836 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30839 arm_pre_atomic_barrier (enum memmodel model
)
30841 if (need_atomic_barrier_p (model
, true))
30842 emit_insn (gen_memory_barrier ());
30846 arm_post_atomic_barrier (enum memmodel model
)
30848 if (need_atomic_barrier_p (model
, false))
30849 emit_insn (gen_memory_barrier ());
30852 /* Emit the load-exclusive and store-exclusive instructions.
30853 Use acquire and release versions if necessary. */
30856 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
30858 rtx (*gen
) (rtx
, rtx
);
30864 case E_QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
30865 case E_HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
30866 case E_SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
30867 case E_DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
30869 gcc_unreachable ();
30876 case E_QImode
: gen
= gen_arm_load_exclusiveqi
; break;
30877 case E_HImode
: gen
= gen_arm_load_exclusivehi
; break;
30878 case E_SImode
: gen
= gen_arm_load_exclusivesi
; break;
30879 case E_DImode
: gen
= gen_arm_load_exclusivedi
; break;
30881 gcc_unreachable ();
30885 emit_insn (gen (rval
, mem
));
30889 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
30892 rtx (*gen
) (rtx
, rtx
, rtx
);
30898 case E_QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
30899 case E_HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
30900 case E_SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
30901 case E_DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
30903 gcc_unreachable ();
30910 case E_QImode
: gen
= gen_arm_store_exclusiveqi
; break;
30911 case E_HImode
: gen
= gen_arm_store_exclusivehi
; break;
30912 case E_SImode
: gen
= gen_arm_store_exclusivesi
; break;
30913 case E_DImode
: gen
= gen_arm_store_exclusivedi
; break;
30915 gcc_unreachable ();
30919 emit_insn (gen (bval
, rval
, mem
));
30922 /* Mark the previous jump instruction as unlikely. */
30925 emit_unlikely_jump (rtx insn
)
30927 rtx_insn
*jump
= emit_jump_insn (insn
);
30928 add_reg_br_prob_note (jump
, profile_probability::very_unlikely ());
30931 /* Expand a compare and swap pattern. */
30934 arm_expand_compare_and_swap (rtx operands
[])
30936 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
30937 machine_mode mode
, cmp_mode
;
30939 bval
= operands
[0];
30940 rval
= operands
[1];
30942 oldval
= operands
[3];
30943 newval
= operands
[4];
30944 is_weak
= operands
[5];
30945 mod_s
= operands
[6];
30946 mod_f
= operands
[7];
30947 mode
= GET_MODE (mem
);
30949 /* Normally the succ memory model must be stronger than fail, but in the
30950 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30951 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30953 if (TARGET_HAVE_LDACQ
30954 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
30955 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
30956 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
30962 /* For narrow modes, we're going to perform the comparison in SImode,
30963 so do the zero-extension now. */
30964 rval
= gen_reg_rtx (SImode
);
30965 oldval
= convert_modes (SImode
, mode
, oldval
, true);
30969 /* Force the value into a register if needed. We waited until after
30970 the zero-extension above to do this properly. */
30971 if (!arm_add_operand (oldval
, SImode
))
30972 oldval
= force_reg (SImode
, oldval
);
30976 if (!cmpdi_operand (oldval
, mode
))
30977 oldval
= force_reg (mode
, oldval
);
30981 gcc_unreachable ();
30985 cmp_mode
= E_SImode
;
30987 cmp_mode
= CC_Zmode
;
30989 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
30990 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode
, mode
, bdst
, rval
, mem
,
30991 oldval
, newval
, is_weak
, mod_s
, mod_f
));
30993 if (mode
== QImode
|| mode
== HImode
)
30994 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
30996 /* In all cases, we arrange for success to be signaled by Z set.
30997 This arrangement allows for the boolean result to be used directly
30998 in a subsequent branch, post optimization. For Thumb-1 targets, the
30999 boolean negation of the result is also stored in bval because Thumb-1
31000 backend lacks dependency tracking for CC flag due to flag-setting not
31001 being represented at RTL level. */
31003 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
31006 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
31007 emit_insn (gen_rtx_SET (bval
, x
));
31011 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
31012 another memory store between the load-exclusive and store-exclusive can
31013 reset the monitor from Exclusive to Open state. This means we must wait
31014 until after reload to split the pattern, lest we get a register spill in
31015 the middle of the atomic sequence. Success of the compare and swap is
31016 indicated by the Z flag set for 32bit targets and by neg_bval being zero
31017 for Thumb-1 targets (ie. negation of the boolean value returned by
31018 atomic_compare_and_swapmode standard pattern in operand 0). */
31021 arm_split_compare_and_swap (rtx operands
[])
31023 rtx rval
, mem
, oldval
, newval
, neg_bval
, mod_s_rtx
;
31025 enum memmodel mod_s
, mod_f
;
31027 rtx_code_label
*label1
, *label2
;
31030 rval
= operands
[1];
31032 oldval
= operands
[3];
31033 newval
= operands
[4];
31034 is_weak
= (operands
[5] != const0_rtx
);
31035 mod_s_rtx
= operands
[6];
31036 mod_s
= memmodel_from_int (INTVAL (mod_s_rtx
));
31037 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
31038 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
31039 mode
= GET_MODE (mem
);
31041 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
31043 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (mod_s_rtx
);
31044 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (mod_s_rtx
);
31046 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
31047 a full barrier is emitted after the store-release. */
31049 use_acquire
= false;
31051 /* Checks whether a barrier is needed and emits one accordingly. */
31052 if (!(use_acquire
|| use_release
))
31053 arm_pre_atomic_barrier (mod_s
);
31058 label1
= gen_label_rtx ();
31059 emit_label (label1
);
31061 label2
= gen_label_rtx ();
31063 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
31065 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31066 as required to communicate with arm_expand_compare_and_swap. */
31069 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
31070 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
31071 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
31072 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
31073 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
31077 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
31078 if (thumb1_cmpneg_operand (oldval
, SImode
))
31081 if (!satisfies_constraint_L (oldval
))
31083 gcc_assert (satisfies_constraint_J (oldval
));
31085 /* For such immediates, ADDS needs the source and destination regs
31088 Normally this would be handled by RA, but this is all happening
31090 emit_move_insn (neg_bval
, rval
);
31094 emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval
, src
, oldval
,
31099 emit_move_insn (neg_bval
, const1_rtx
);
31100 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
31104 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
31106 /* Weak or strong, we want EQ to be true for success, so that we
31107 match the flags that we got from the compare above. */
31110 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
31111 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
31112 emit_insn (gen_rtx_SET (cond
, x
));
31117 /* Z is set to boolean value of !neg_bval, as required to communicate
31118 with arm_expand_compare_and_swap. */
31119 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
31120 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
31123 if (!is_mm_relaxed (mod_f
))
31124 emit_label (label2
);
31126 /* Checks whether a barrier is needed and emits one accordingly. */
31128 || !(use_acquire
|| use_release
))
31129 arm_post_atomic_barrier (mod_s
);
31131 if (is_mm_relaxed (mod_f
))
31132 emit_label (label2
);
31135 /* Split an atomic operation pattern. Operation is given by CODE and is one
31136 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31137 operation). Operation is performed on the content at MEM and on VALUE
31138 following the memory model MODEL_RTX. The content at MEM before and after
31139 the operation is returned in OLD_OUT and NEW_OUT respectively while the
31140 success of the operation is returned in COND. Using a scratch register or
31141 an operand register for these determines what result is returned for that
31145 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
31146 rtx value
, rtx model_rtx
, rtx cond
)
31148 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
31149 machine_mode mode
= GET_MODE (mem
);
31150 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
31151 rtx_code_label
*label
;
31152 bool all_low_regs
, bind_old_new
;
31155 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
31157 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (model_rtx
);
31158 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (model_rtx
);
31160 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
31161 a full barrier is emitted after the store-release. */
31163 use_acquire
= false;
31165 /* Checks whether a barrier is needed and emits one accordingly. */
31166 if (!(use_acquire
|| use_release
))
31167 arm_pre_atomic_barrier (model
);
31169 label
= gen_label_rtx ();
31170 emit_label (label
);
31173 new_out
= gen_lowpart (wmode
, new_out
);
31175 old_out
= gen_lowpart (wmode
, old_out
);
31178 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
31180 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
31182 /* Does the operation require destination and first operand to use the same
31183 register? This is decided by register constraints of relevant insn
31184 patterns in thumb1.md. */
31185 gcc_assert (!new_out
|| REG_P (new_out
));
31186 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
31187 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
31188 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
31193 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
31195 /* We want to return the old value while putting the result of the operation
31196 in the same register as the old value so copy the old value over to the
31197 destination register and use that register for the operation. */
31198 if (old_out
&& bind_old_new
)
31200 emit_move_insn (new_out
, old_out
);
31211 x
= gen_rtx_AND (wmode
, old_out
, value
);
31212 emit_insn (gen_rtx_SET (new_out
, x
));
31213 x
= gen_rtx_NOT (wmode
, new_out
);
31214 emit_insn (gen_rtx_SET (new_out
, x
));
31218 if (CONST_INT_P (value
))
31220 value
= gen_int_mode (-INTVAL (value
), wmode
);
31226 if (mode
== DImode
)
31228 /* DImode plus/minus need to clobber flags. */
31229 /* The adddi3 and subdi3 patterns are incorrectly written so that
31230 they require matching operands, even when we could easily support
31231 three operands. Thankfully, this can be fixed up post-splitting,
31232 as the individual add+adc patterns do accept three operands and
31233 post-reload cprop can make these moves go away. */
31234 emit_move_insn (new_out
, old_out
);
31236 x
= gen_adddi3 (new_out
, new_out
, value
);
31238 x
= gen_subdi3 (new_out
, new_out
, value
);
31245 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
31246 emit_insn (gen_rtx_SET (new_out
, x
));
31250 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
31253 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
31254 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
31256 /* Checks whether a barrier is needed and emits one accordingly. */
31258 || !(use_acquire
|| use_release
))
31259 arm_post_atomic_barrier (model
);
31262 /* Return the mode for the MVE vector of predicates corresponding to MODE. */
31264 arm_mode_to_pred_mode (machine_mode mode
)
31266 switch (GET_MODE_NUNITS (mode
))
31268 case 16: return V16BImode
;
31269 case 8: return V8BImode
;
31270 case 4: return V4BImode
;
31272 return opt_machine_mode ();
31275 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31276 If CAN_INVERT, store either the result or its inverse in TARGET
31277 and return true if TARGET contains the inverse. If !CAN_INVERT,
31278 always store the result in TARGET, never its inverse.
31280 Note that the handling of floating-point comparisons is not
31284 arm_expand_vector_compare (rtx target
, rtx_code code
, rtx op0
, rtx op1
,
31287 machine_mode cmp_result_mode
= GET_MODE (target
);
31288 machine_mode cmp_mode
= GET_MODE (op0
);
31292 /* MVE supports more comparisons than Neon. */
31293 if (TARGET_HAVE_MVE
)
31298 /* For these we need to compute the inverse of the requested
31307 code
= reverse_condition_maybe_unordered (code
);
31310 /* Recursively emit the inverted comparison into a temporary
31311 and then store its inverse in TARGET. This avoids reusing
31312 TARGET (which for integer NE could be one of the inputs). */
31313 rtx tmp
= gen_reg_rtx (cmp_result_mode
);
31314 if (arm_expand_vector_compare (tmp
, code
, op0
, op1
, true))
31315 gcc_unreachable ();
31316 emit_insn (gen_rtx_SET (target
, gen_rtx_NOT (cmp_result_mode
, tmp
)));
31329 /* These are natively supported by Neon for zero comparisons, but otherwise
31330 require the operands to be swapped. For MVE, we can only compare
31334 if (!TARGET_HAVE_MVE
)
31335 if (op1
!= CONST0_RTX (cmp_mode
))
31337 code
= swap_condition (code
);
31338 std::swap (op0
, op1
);
31340 /* Fall through. */
31342 /* These are natively supported by Neon for both register and zero
31343 operands. MVE supports registers only. */
31348 if (TARGET_HAVE_MVE
)
31350 switch (GET_MODE_CLASS (cmp_mode
))
31352 case MODE_VECTOR_INT
:
31353 emit_insn (gen_mve_vcmpq (code
, cmp_mode
, target
,
31354 op0
, force_reg (cmp_mode
, op1
)));
31356 case MODE_VECTOR_FLOAT
:
31357 if (TARGET_HAVE_MVE_FLOAT
)
31358 emit_insn (gen_mve_vcmpq_f (code
, cmp_mode
, target
,
31359 op0
, force_reg (cmp_mode
, op1
)));
31361 gcc_unreachable ();
31364 gcc_unreachable ();
31368 emit_insn (gen_neon_vc (code
, cmp_mode
, target
, op0
, op1
));
31371 /* These are natively supported for register operands only.
31372 Comparisons with zero aren't useful and should be folded
31373 or canonicalized by target-independent code. */
31376 if (TARGET_HAVE_MVE
)
31377 emit_insn (gen_mve_vcmpq (code
, cmp_mode
, target
,
31378 op0
, force_reg (cmp_mode
, op1
)));
31380 emit_insn (gen_neon_vc (code
, cmp_mode
, target
,
31381 op0
, force_reg (cmp_mode
, op1
)));
31384 /* These require the operands to be swapped and likewise do not
31385 support comparisons with zero. */
31388 if (TARGET_HAVE_MVE
)
31389 emit_insn (gen_mve_vcmpq (swap_condition (code
), cmp_mode
, target
,
31390 force_reg (cmp_mode
, op1
), op0
));
31392 emit_insn (gen_neon_vc (swap_condition (code
), cmp_mode
,
31393 target
, force_reg (cmp_mode
, op1
), op0
));
31396 /* These need a combination of two comparisons. */
31400 /* Operands are LTGT iff (a > b || a > b).
31401 Operands are ORDERED iff (a > b || a <= b). */
31402 rtx gt_res
= gen_reg_rtx (cmp_result_mode
);
31403 rtx alt_res
= gen_reg_rtx (cmp_result_mode
);
31404 rtx_code alt_code
= (code
== LTGT
? LT
: LE
);
31405 if (arm_expand_vector_compare (gt_res
, GT
, op0
, op1
, true)
31406 || arm_expand_vector_compare (alt_res
, alt_code
, op0
, op1
, true))
31407 gcc_unreachable ();
31408 emit_insn (gen_rtx_SET (target
, gen_rtx_IOR (cmp_result_mode
,
31409 gt_res
, alt_res
)));
31414 gcc_unreachable ();
31418 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31419 CMP_RESULT_MODE is the mode of the comparison result. */
31422 arm_expand_vcond (rtx
*operands
, machine_mode cmp_result_mode
)
31424 /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31425 arm_expand_vector_compare, and another one here. */
31428 if (TARGET_HAVE_MVE
)
31429 mask
= gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode
).require ());
31431 mask
= gen_reg_rtx (cmp_result_mode
);
31433 bool inverted
= arm_expand_vector_compare (mask
, GET_CODE (operands
[3]),
31434 operands
[4], operands
[5], true);
31436 std::swap (operands
[1], operands
[2]);
31438 emit_insn (gen_neon_vbsl (GET_MODE (operands
[0]), operands
[0],
31439 mask
, operands
[1], operands
[2]));
31442 machine_mode cmp_mode
= GET_MODE (operands
[0]);
31444 switch (GET_MODE_CLASS (cmp_mode
))
31446 case MODE_VECTOR_INT
:
31447 emit_insn (gen_mve_vpselq (VPSELQ_S
, cmp_mode
, operands
[0],
31448 operands
[1], operands
[2], mask
));
31450 case MODE_VECTOR_FLOAT
:
31451 if (TARGET_HAVE_MVE_FLOAT
)
31452 emit_insn (gen_mve_vpselq_f (cmp_mode
, operands
[0],
31453 operands
[1], operands
[2], mask
));
31455 gcc_unreachable ();
31458 gcc_unreachable ();
31463 #define MAX_VECT_LEN 16
31465 struct expand_vec_perm_d
31467 rtx target
, op0
, op1
;
31468 vec_perm_indices perm
;
31469 machine_mode vmode
;
31474 /* Generate a variable permutation. */
31477 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
31479 machine_mode vmode
= GET_MODE (target
);
31480 bool one_vector_p
= rtx_equal_p (op0
, op1
);
31482 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
31483 gcc_checking_assert (GET_MODE (op0
) == vmode
);
31484 gcc_checking_assert (GET_MODE (op1
) == vmode
);
31485 gcc_checking_assert (GET_MODE (sel
) == vmode
);
31486 gcc_checking_assert (TARGET_NEON
);
31490 if (vmode
== V8QImode
)
31491 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
31493 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
31499 if (vmode
== V8QImode
)
31501 pair
= gen_reg_rtx (V16QImode
);
31502 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
31503 pair
= gen_lowpart (TImode
, pair
);
31504 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
31508 pair
= gen_reg_rtx (OImode
);
31509 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
31510 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
31516 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
31518 machine_mode vmode
= GET_MODE (target
);
31519 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
31520 bool one_vector_p
= rtx_equal_p (op0
, op1
);
31523 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31524 numbering of elements for big-endian, we must reverse the order. */
31525 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
31527 /* The VTBL instruction does not use a modulo index, so we must take care
31528 of that ourselves. */
31529 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31530 mask
= gen_const_vec_duplicate (vmode
, mask
);
31531 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
31533 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
31536 /* Map lane ordering between architectural lane order, and GCC lane order,
31537 taking into account ABI. See comment above output_move_neon for details. */
31540 neon_endian_lane_map (machine_mode mode
, int lane
)
31542 if (BYTES_BIG_ENDIAN
)
31544 int nelems
= GET_MODE_NUNITS (mode
);
31545 /* Reverse lane order. */
31546 lane
= (nelems
- 1 - lane
);
31547 /* Reverse D register order, to match ABI. */
31548 if (GET_MODE_SIZE (mode
) == 16)
31549 lane
= lane
^ (nelems
/ 2);
31554 /* Some permutations index into pairs of vectors, this is a helper function
31555 to map indexes into those pairs of vectors. */
31558 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
31560 int nelem
= GET_MODE_NUNITS (mode
);
31561 if (BYTES_BIG_ENDIAN
)
31563 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
31567 /* Generate or test for an insn that supports a constant permutation. */
31569 /* Recognize patterns for the VUZP insns. */
31572 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
31574 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
31575 rtx out0
, out1
, in0
, in1
;
31579 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
31582 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31583 big endian pattern on 64 bit vectors, so we correct for that. */
31584 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
31585 && GET_MODE_SIZE (d
->vmode
) == 8 ? nelt
: 0;
31587 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
31589 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
31591 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
31595 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31597 for (i
= 0; i
< nelt
; i
++)
31600 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
31601 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
31611 if (swap_nelt
!= 0)
31612 std::swap (in0
, in1
);
31615 out1
= gen_reg_rtx (d
->vmode
);
31617 std::swap (out0
, out1
);
31619 emit_insn (gen_neon_vuzp_internal (d
->vmode
, out0
, in0
, in1
, out1
));
31623 /* Recognize patterns for the VZIP insns. */
31626 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
31628 unsigned int i
, high
, mask
, nelt
= d
->perm
.length ();
31629 rtx out0
, out1
, in0
, in1
;
31633 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
31636 is_swapped
= BYTES_BIG_ENDIAN
;
31638 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
31641 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
31643 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
31647 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31649 for (i
= 0; i
< nelt
/ 2; i
++)
31652 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
31653 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
31657 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
31658 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
31670 std::swap (in0
, in1
);
31673 out1
= gen_reg_rtx (d
->vmode
);
31675 std::swap (out0
, out1
);
31677 emit_insn (gen_neon_vzip_internal (d
->vmode
, out0
, in0
, in1
, out1
));
31681 /* Recognize patterns for the VREV insns. */
31683 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
31685 unsigned int i
, j
, diff
, nelt
= d
->perm
.length ();
31686 rtx (*gen
) (machine_mode
, rtx
, rtx
);
31688 if (!d
->one_vector_p
)
31699 gen
= gen_neon_vrev64
;
31710 gen
= gen_neon_vrev32
;
31716 gen
= gen_neon_vrev64
;
31727 gen
= gen_neon_vrev16
;
31731 gen
= gen_neon_vrev32
;
31737 gen
= gen_neon_vrev64
;
31747 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
31748 for (j
= 0; j
<= diff
; j
+= 1)
31750 /* This is guaranteed to be true as the value of diff
31751 is 7, 3, 1 and we should have enough elements in the
31752 queue to generate this. Getting a vector mask with a
31753 value of diff other than these values implies that
31754 something is wrong by the time we get here. */
31755 gcc_assert (i
+ j
< nelt
);
31756 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
31764 emit_insn (gen (d
->vmode
, d
->target
, d
->op0
));
31768 /* Recognize patterns for the VTRN insns. */
31771 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
31773 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
31774 rtx out0
, out1
, in0
, in1
;
31776 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
31779 /* Note that these are little-endian tests. Adjust for big-endian later. */
31780 if (d
->perm
[0] == 0)
31782 else if (d
->perm
[0] == 1)
31786 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31788 for (i
= 0; i
< nelt
; i
+= 2)
31790 if (d
->perm
[i
] != i
+ odd
)
31792 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
31802 if (BYTES_BIG_ENDIAN
)
31804 std::swap (in0
, in1
);
31809 out1
= gen_reg_rtx (d
->vmode
);
31811 std::swap (out0
, out1
);
31813 emit_insn (gen_neon_vtrn_internal (d
->vmode
, out0
, in0
, in1
, out1
));
31817 /* Recognize patterns for the VEXT insns. */
31820 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
31822 unsigned int i
, nelt
= d
->perm
.length ();
31825 unsigned int location
;
31827 unsigned int next
= d
->perm
[0] + 1;
31829 /* TODO: Handle GCC's numbering of elements for big-endian. */
31830 if (BYTES_BIG_ENDIAN
)
31833 /* Check if the extracted indexes are increasing by one. */
31834 for (i
= 1; i
< nelt
; next
++, i
++)
31836 /* If we hit the most significant element of the 2nd vector in
31837 the previous iteration, no need to test further. */
31838 if (next
== 2 * nelt
)
31841 /* If we are operating on only one vector: it could be a
31842 rotation. If there are only two elements of size < 64, let
31843 arm_evpc_neon_vrev catch it. */
31844 if (d
->one_vector_p
&& (next
== nelt
))
31846 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
31852 if (d
->perm
[i
] != next
)
31856 location
= d
->perm
[0];
31862 offset
= GEN_INT (location
);
31864 if(d
->vmode
== E_DImode
)
31867 emit_insn (gen_neon_vext (d
->vmode
, d
->target
, d
->op0
, d
->op1
, offset
));
31871 /* The NEON VTBL instruction is a fully variable permuation that's even
31872 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31873 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31874 can do slightly better by expanding this as a constant where we don't
31875 have to apply a mask. */
31878 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
31880 rtx rperm
[MAX_VECT_LEN
], sel
;
31881 machine_mode vmode
= d
->vmode
;
31882 unsigned int i
, nelt
= d
->perm
.length ();
31884 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31885 numbering of elements for big-endian, we must reverse the order. */
31886 if (BYTES_BIG_ENDIAN
)
31892 /* Generic code will try constant permutation twice. Once with the
31893 original mode and again with the elements lowered to QImode.
31894 So wait and don't do the selector expansion ourselves. */
31895 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
31898 for (i
= 0; i
< nelt
; ++i
)
31899 rperm
[i
] = GEN_INT (d
->perm
[i
]);
31900 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
31901 sel
= force_reg (vmode
, sel
);
31903 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
31908 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
31910 /* Check if the input mask matches vext before reordering the
31913 if (arm_evpc_neon_vext (d
))
31916 /* The pattern matching functions above are written to look for a small
31917 number to begin the sequence (0, 1, N/2). If we begin with an index
31918 from the second operand, we can swap the operands. */
31919 unsigned int nelt
= d
->perm
.length ();
31920 if (d
->perm
[0] >= nelt
)
31922 d
->perm
.rotate_inputs (1);
31923 std::swap (d
->op0
, d
->op1
);
31928 if (arm_evpc_neon_vuzp (d
))
31930 if (arm_evpc_neon_vzip (d
))
31932 if (arm_evpc_neon_vrev (d
))
31934 if (arm_evpc_neon_vtrn (d
))
31936 return arm_evpc_neon_vtbl (d
);
31941 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
31944 arm_vectorize_vec_perm_const (machine_mode vmode
, machine_mode op_mode
,
31945 rtx target
, rtx op0
, rtx op1
,
31946 const vec_perm_indices
&sel
)
31948 if (vmode
!= op_mode
)
31951 struct expand_vec_perm_d d
;
31952 int i
, nelt
, which
;
31954 if (!VALID_NEON_DREG_MODE (vmode
) && !VALID_NEON_QREG_MODE (vmode
))
31960 rtx nop0
= force_reg (vmode
, op0
);
31966 op1
= force_reg (vmode
, op1
);
31971 gcc_assert (VECTOR_MODE_P (d
.vmode
));
31972 d
.testing_p
= !target
;
31974 nelt
= GET_MODE_NUNITS (d
.vmode
);
31975 for (i
= which
= 0; i
< nelt
; ++i
)
31977 int ei
= sel
[i
] & (2 * nelt
- 1);
31978 which
|= (ei
< nelt
? 1 : 2);
31987 d
.one_vector_p
= false;
31988 if (d
.testing_p
|| !rtx_equal_p (op0
, op1
))
31991 /* The elements of PERM do not suggest that only the first operand
31992 is used, but both operands are identical. Allow easier matching
31993 of the permutation by folding the permutation into the single
31998 d
.one_vector_p
= true;
32003 d
.one_vector_p
= true;
32007 d
.perm
.new_vector (sel
.encoding (), d
.one_vector_p
? 1 : 2, nelt
);
32010 return arm_expand_vec_perm_const_1 (&d
);
32012 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
32013 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
32014 if (!d
.one_vector_p
)
32015 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
32018 bool ret
= arm_expand_vec_perm_const_1 (&d
);
32025 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
32027 /* If we are soft float and we do not have ldrd
32028 then all auto increment forms are ok. */
32029 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
32034 /* Post increment and Pre Decrement are supported for all
32035 instruction forms except for vector forms. */
32038 if (VECTOR_MODE_P (mode
))
32040 if (code
!= ARM_PRE_DEC
)
32050 /* Without LDRD and mode size greater than
32051 word size, there is no point in auto-incrementing
32052 because ldm and stm will not have these forms. */
32053 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
32056 /* Vector and floating point modes do not support
32057 these auto increment forms. */
32058 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
32071 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32072 on ARM, since we know that shifts by negative amounts are no-ops.
32073 Additionally, the default expansion code is not available or suitable
32074 for post-reload insn splits (this can occur when the register allocator
32075 chooses not to do a shift in NEON).
32077 This function is used in both initial expand and post-reload splits, and
32078 handles all kinds of 64-bit shifts.
32080 Input requirements:
32081 - It is safe for the input and output to be the same register, but
32082 early-clobber rules apply for the shift amount and scratch registers.
32083 - Shift by register requires both scratch registers. In all other cases
32084 the scratch registers may be NULL.
32085 - Ashiftrt by a register also clobbers the CC register. */
32087 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
32088 rtx amount
, rtx scratch1
, rtx scratch2
)
32090 rtx out_high
= gen_highpart (SImode
, out
);
32091 rtx out_low
= gen_lowpart (SImode
, out
);
32092 rtx in_high
= gen_highpart (SImode
, in
);
32093 rtx in_low
= gen_lowpart (SImode
, in
);
32096 in = the register pair containing the input value.
32097 out = the destination register pair.
32098 up = the high- or low-part of each pair.
32099 down = the opposite part to "up".
32100 In a shift, we can consider bits to shift from "up"-stream to
32101 "down"-stream, so in a left-shift "up" is the low-part and "down"
32102 is the high-part of each register pair. */
32104 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
32105 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
32106 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
32107 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
32109 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
32111 && (REG_P (out
) || SUBREG_P (out
))
32112 && GET_MODE (out
) == DImode
);
32114 && (REG_P (in
) || SUBREG_P (in
))
32115 && GET_MODE (in
) == DImode
);
32117 && (((REG_P (amount
) || SUBREG_P (amount
))
32118 && GET_MODE (amount
) == SImode
)
32119 || CONST_INT_P (amount
)));
32120 gcc_assert (scratch1
== NULL
32121 || (GET_CODE (scratch1
) == SCRATCH
)
32122 || (GET_MODE (scratch1
) == SImode
32123 && REG_P (scratch1
)));
32124 gcc_assert (scratch2
== NULL
32125 || (GET_CODE (scratch2
) == SCRATCH
)
32126 || (GET_MODE (scratch2
) == SImode
32127 && REG_P (scratch2
)));
32128 gcc_assert (!REG_P (out
) || !REG_P (amount
)
32129 || !HARD_REGISTER_P (out
)
32130 || (REGNO (out
) != REGNO (amount
)
32131 && REGNO (out
) + 1 != REGNO (amount
)));
32133 /* Macros to make following code more readable. */
32134 #define SUB_32(DEST,SRC) \
32135 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32136 #define RSB_32(DEST,SRC) \
32137 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32138 #define SUB_S_32(DEST,SRC) \
32139 gen_addsi3_compare0 ((DEST), (SRC), \
32141 #define SET(DEST,SRC) \
32142 gen_rtx_SET ((DEST), (SRC))
32143 #define SHIFT(CODE,SRC,AMOUNT) \
32144 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32145 #define LSHIFT(CODE,SRC,AMOUNT) \
32146 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32147 SImode, (SRC), (AMOUNT))
32148 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32149 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32150 SImode, (SRC), (AMOUNT))
32152 gen_rtx_IOR (SImode, (A), (B))
32153 #define BRANCH(COND,LABEL) \
32154 gen_arm_cond_branch ((LABEL), \
32155 gen_rtx_ ## COND (CCmode, cc_reg, \
32159 /* Shifts by register and shifts by constant are handled separately. */
32160 if (CONST_INT_P (amount
))
32162 /* We have a shift-by-constant. */
32164 /* First, handle out-of-range shift amounts.
32165 In both cases we try to match the result an ARM instruction in a
32166 shift-by-register would give. This helps reduce execution
32167 differences between optimization levels, but it won't stop other
32168 parts of the compiler doing different things. This is "undefined
32169 behavior, in any case. */
32170 if (INTVAL (amount
) <= 0)
32171 emit_insn (gen_movdi (out
, in
));
32172 else if (INTVAL (amount
) >= 64)
32174 if (code
== ASHIFTRT
)
32176 rtx const31_rtx
= GEN_INT (31);
32177 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
32178 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
32181 emit_insn (gen_movdi (out
, const0_rtx
));
32184 /* Now handle valid shifts. */
32185 else if (INTVAL (amount
) < 32)
32187 /* Shifts by a constant less than 32. */
32188 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
32190 /* Clearing the out register in DImode first avoids lots
32191 of spilling and results in less stack usage.
32192 Later this redundant insn is completely removed.
32193 Do that only if "in" and "out" are different registers. */
32194 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
32195 emit_insn (SET (out
, const0_rtx
));
32196 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
32197 emit_insn (SET (out_down
,
32198 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
32200 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
32204 /* Shifts by a constant greater than 31. */
32205 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
32207 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
32208 emit_insn (SET (out
, const0_rtx
));
32209 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
32210 if (code
== ASHIFTRT
)
32211 emit_insn (gen_ashrsi3 (out_up
, in_up
,
32214 emit_insn (SET (out_up
, const0_rtx
));
32219 /* We have a shift-by-register. */
32220 rtx cc_reg
= gen_rtx_REG (CC_NZmode
, CC_REGNUM
);
32222 /* This alternative requires the scratch registers. */
32223 gcc_assert (scratch1
&& REG_P (scratch1
));
32224 gcc_assert (scratch2
&& REG_P (scratch2
));
32226 /* We will need the values "amount-32" and "32-amount" later.
32227 Swapping them around now allows the later code to be more general. */
32231 emit_insn (SUB_32 (scratch1
, amount
));
32232 emit_insn (RSB_32 (scratch2
, amount
));
32235 emit_insn (RSB_32 (scratch1
, amount
));
32236 /* Also set CC = amount > 32. */
32237 emit_insn (SUB_S_32 (scratch2
, amount
));
32240 emit_insn (RSB_32 (scratch1
, amount
));
32241 emit_insn (SUB_32 (scratch2
, amount
));
32244 gcc_unreachable ();
32247 /* Emit code like this:
32250 out_down = in_down << amount;
32251 out_down = (in_up << (amount - 32)) | out_down;
32252 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32253 out_up = in_up << amount;
32256 out_down = in_down >> amount;
32257 out_down = (in_up << (32 - amount)) | out_down;
32259 out_down = ((signed)in_up >> (amount - 32)) | out_down;
32260 out_up = in_up << amount;
32263 out_down = in_down >> amount;
32264 out_down = (in_up << (32 - amount)) | out_down;
32266 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32267 out_up = in_up << amount;
32269 The ARM and Thumb2 variants are the same but implemented slightly
32270 differently. If this were only called during expand we could just
32271 use the Thumb2 case and let combine do the right thing, but this
32272 can also be called from post-reload splitters. */
32274 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
32276 if (!TARGET_THUMB2
)
32278 /* Emit code for ARM mode. */
32279 emit_insn (SET (out_down
,
32280 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
32281 if (code
== ASHIFTRT
)
32283 rtx_code_label
*done_label
= gen_label_rtx ();
32284 emit_jump_insn (BRANCH (LT
, done_label
));
32285 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
32287 emit_label (done_label
);
32290 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
32295 /* Emit code for Thumb2 mode.
32296 Thumb2 can't do shift and or in one insn. */
32297 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
32298 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
32300 if (code
== ASHIFTRT
)
32302 rtx_code_label
*done_label
= gen_label_rtx ();
32303 emit_jump_insn (BRANCH (LT
, done_label
));
32304 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
32305 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
32306 emit_label (done_label
);
32310 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
32311 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
32315 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
32329 /* Returns true if the pattern is a valid symbolic address, which is either a
32330 symbol_ref or (symbol_ref + addend).
32332 According to the ARM ELF ABI, the initial addend of REL-type relocations
32333 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32334 literal field of the instruction as a 16-bit signed value in the range
32335 -32768 <= A < 32768.
32337 In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32338 unsigned range of 0 <= A < 256 as described in the AAELF32
32339 relocation handling documentation: REL-type relocations are encoded
32340 as unsigned in this case. */
32343 arm_valid_symbolic_address_p (rtx addr
)
32345 rtx xop0
, xop1
= NULL_RTX
;
32348 if (target_word_relocations
)
32351 if (SYMBOL_REF_P (tmp
) || LABEL_REF_P (tmp
))
32354 /* (const (plus: symbol_ref const_int)) */
32355 if (GET_CODE (addr
) == CONST
)
32356 tmp
= XEXP (addr
, 0);
32358 if (GET_CODE (tmp
) == PLUS
)
32360 xop0
= XEXP (tmp
, 0);
32361 xop1
= XEXP (tmp
, 1);
32363 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
32365 if (TARGET_THUMB1
&& !TARGET_HAVE_MOVT
)
32366 return IN_RANGE (INTVAL (xop1
), 0, 0xff);
32368 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
32375 /* Returns true if a valid comparison operation and makes
32376 the operands in a form that is valid. */
32378 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
32380 enum rtx_code code
= GET_CODE (*comparison
);
32382 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
32383 ? GET_MODE (*op2
) : GET_MODE (*op1
);
32385 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
32387 if (code
== UNEQ
|| code
== LTGT
)
32390 code_int
= (int)code
;
32391 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
32392 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
32397 if (!arm_add_operand (*op1
, mode
))
32398 *op1
= force_reg (mode
, *op1
);
32399 if (!arm_add_operand (*op2
, mode
))
32400 *op2
= force_reg (mode
, *op2
);
32404 /* gen_compare_reg() will sort out any invalid operands. */
32408 if (!TARGET_VFP_FP16INST
)
32410 /* FP16 comparisons are done in SF mode. */
32412 *op1
= convert_to_mode (mode
, *op1
, 1);
32413 *op2
= convert_to_mode (mode
, *op2
, 1);
32414 /* Fall through. */
32417 if (!vfp_compare_operand (*op1
, mode
))
32418 *op1
= force_reg (mode
, *op1
);
32419 if (!vfp_compare_operand (*op2
, mode
))
32420 *op2
= force_reg (mode
, *op2
);
32430 /* Maximum number of instructions to set block of memory. */
32432 arm_block_set_max_insns (void)
32434 if (optimize_function_for_size_p (cfun
))
32437 return current_tune
->max_insns_inline_memset
;
32440 /* Return TRUE if it's profitable to set block of memory for
32441 non-vectorized case. VAL is the value to set the memory
32442 with. LENGTH is the number of bytes to set. ALIGN is the
32443 alignment of the destination memory in bytes. UNALIGNED_P
32444 is TRUE if we can only set the memory with instructions
32445 meeting alignment requirements. USE_STRD_P is TRUE if we
32446 can use strd to set the memory. */
32448 arm_block_set_non_vect_profit_p (rtx val
,
32449 unsigned HOST_WIDE_INT length
,
32450 unsigned HOST_WIDE_INT align
,
32451 bool unaligned_p
, bool use_strd_p
)
32454 /* For leftovers in bytes of 0-7, we can set the memory block using
32455 strb/strh/str with minimum instruction number. */
32456 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32460 num
= arm_const_inline_cost (SET
, val
);
32461 num
+= length
/ align
+ length
% align
;
32463 else if (use_strd_p
)
32465 num
= arm_const_double_inline_cost (val
);
32466 num
+= (length
>> 3) + leftover
[length
& 7];
32470 num
= arm_const_inline_cost (SET
, val
);
32471 num
+= (length
>> 2) + leftover
[length
& 3];
32474 /* We may be able to combine last pair STRH/STRB into a single STR
32475 by shifting one byte back. */
32476 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
32479 return (num
<= arm_block_set_max_insns ());
32482 /* Return TRUE if it's profitable to set block of memory for
32483 vectorized case. LENGTH is the number of bytes to set.
32484 ALIGN is the alignment of destination memory in bytes.
32485 MODE is the vector mode used to set the memory. */
32487 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
32488 unsigned HOST_WIDE_INT align
,
32492 bool unaligned_p
= ((align
& 3) != 0);
32493 unsigned int nelt
= GET_MODE_NUNITS (mode
);
32495 /* Instruction loading constant value. */
32497 /* Instructions storing the memory. */
32498 num
+= (length
+ nelt
- 1) / nelt
;
32499 /* Instructions adjusting the address expression. Only need to
32500 adjust address expression if it's 4 bytes aligned and bytes
32501 leftover can only be stored by mis-aligned store instruction. */
32502 if (!unaligned_p
&& (length
& 3) != 0)
32505 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
32506 if (!unaligned_p
&& mode
== V16QImode
)
32509 return (num
<= arm_block_set_max_insns ());
32512 /* Set a block of memory using vectorization instructions for the
32513 unaligned case. We fill the first LENGTH bytes of the memory
32514 area starting from DSTBASE with byte constant VALUE. ALIGN is
32515 the alignment requirement of memory. Return TRUE if succeeded. */
32517 arm_block_set_unaligned_vect (rtx dstbase
,
32518 unsigned HOST_WIDE_INT length
,
32519 unsigned HOST_WIDE_INT value
,
32520 unsigned HOST_WIDE_INT align
)
32522 unsigned int i
, nelt_v16
, nelt_v8
, nelt_mode
;
32525 rtx (*gen_func
) (rtx
, rtx
);
32527 unsigned HOST_WIDE_INT v
= value
;
32528 unsigned int offset
= 0;
32529 gcc_assert ((align
& 0x3) != 0);
32530 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
32531 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
32532 if (length
>= nelt_v16
)
32535 gen_func
= gen_movmisalignv16qi
;
32540 gen_func
= gen_movmisalignv8qi
;
32542 nelt_mode
= GET_MODE_NUNITS (mode
);
32543 gcc_assert (length
>= nelt_mode
);
32544 /* Skip if it isn't profitable. */
32545 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
32548 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32549 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32551 v
= sext_hwi (v
, BITS_PER_WORD
);
32553 reg
= gen_reg_rtx (mode
);
32554 val_vec
= gen_const_vec_duplicate (mode
, GEN_INT (v
));
32555 /* Emit instruction loading the constant value. */
32556 emit_move_insn (reg
, val_vec
);
32558 /* Handle nelt_mode bytes in a vector. */
32559 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
32561 emit_insn ((*gen_func
) (mem
, reg
));
32562 if (i
+ 2 * nelt_mode
<= length
)
32564 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
32565 offset
+= nelt_mode
;
32566 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32570 /* If there are not less than nelt_v8 bytes leftover, we must be in
32572 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
32574 /* Handle (8, 16) bytes leftover. */
32575 if (i
+ nelt_v8
< length
)
32577 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
32578 offset
+= length
- i
;
32579 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32581 /* We are shifting bytes back, set the alignment accordingly. */
32582 if ((length
& 1) != 0 && align
>= 2)
32583 set_mem_align (mem
, BITS_PER_UNIT
);
32585 emit_insn (gen_movmisalignv16qi (mem
, reg
));
32587 /* Handle (0, 8] bytes leftover. */
32588 else if (i
< length
&& i
+ nelt_v8
>= length
)
32590 if (mode
== V16QImode
)
32591 reg
= gen_lowpart (V8QImode
, reg
);
32593 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
32594 + (nelt_mode
- nelt_v8
))));
32595 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
32596 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
32598 /* We are shifting bytes back, set the alignment accordingly. */
32599 if ((length
& 1) != 0 && align
>= 2)
32600 set_mem_align (mem
, BITS_PER_UNIT
);
32602 emit_insn (gen_movmisalignv8qi (mem
, reg
));
32608 /* Set a block of memory using vectorization instructions for the
32609 aligned case. We fill the first LENGTH bytes of the memory area
32610 starting from DSTBASE with byte constant VALUE. ALIGN is the
32611 alignment requirement of memory. Return TRUE if succeeded. */
32613 arm_block_set_aligned_vect (rtx dstbase
,
32614 unsigned HOST_WIDE_INT length
,
32615 unsigned HOST_WIDE_INT value
,
32616 unsigned HOST_WIDE_INT align
)
32618 unsigned int i
, nelt_v8
, nelt_v16
, nelt_mode
;
32619 rtx dst
, addr
, mem
;
32622 unsigned int offset
= 0;
32624 gcc_assert ((align
& 0x3) == 0);
32625 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
32626 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
32627 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
32632 nelt_mode
= GET_MODE_NUNITS (mode
);
32633 gcc_assert (length
>= nelt_mode
);
32634 /* Skip if it isn't profitable. */
32635 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
32638 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32640 reg
= gen_reg_rtx (mode
);
32641 val_vec
= gen_const_vec_duplicate (mode
, gen_int_mode (value
, QImode
));
32642 /* Emit instruction loading the constant value. */
32643 emit_move_insn (reg
, val_vec
);
32646 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
32647 if (mode
== V16QImode
)
32649 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32650 emit_insn (gen_movmisalignv16qi (mem
, reg
));
32652 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
32653 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
32655 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
32656 offset
+= length
- nelt_mode
;
32657 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32658 /* We are shifting bytes back, set the alignment accordingly. */
32659 if ((length
& 0x3) == 0)
32660 set_mem_align (mem
, BITS_PER_UNIT
* 4);
32661 else if ((length
& 0x1) == 0)
32662 set_mem_align (mem
, BITS_PER_UNIT
* 2);
32664 set_mem_align (mem
, BITS_PER_UNIT
);
32666 emit_insn (gen_movmisalignv16qi (mem
, reg
));
32669 /* Fall through for bytes leftover. */
32671 nelt_mode
= GET_MODE_NUNITS (mode
);
32672 reg
= gen_lowpart (V8QImode
, reg
);
32675 /* Handle 8 bytes in a vector. */
32676 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
32678 addr
= plus_constant (Pmode
, dst
, i
);
32679 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
32680 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
32681 emit_move_insn (mem
, reg
);
32683 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
32686 /* Handle single word leftover by shifting 4 bytes back. We can
32687 use aligned access for this case. */
32688 if (i
+ UNITS_PER_WORD
== length
)
32690 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
32691 offset
+= i
- UNITS_PER_WORD
;
32692 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
32693 /* We are shifting 4 bytes back, set the alignment accordingly. */
32694 if (align
> UNITS_PER_WORD
)
32695 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
32697 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
32699 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32700 We have to use unaligned access for this case. */
32701 else if (i
< length
)
32703 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
32704 offset
+= length
- nelt_mode
;
32705 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32706 /* We are shifting bytes back, set the alignment accordingly. */
32707 if ((length
& 1) == 0)
32708 set_mem_align (mem
, BITS_PER_UNIT
* 2);
32710 set_mem_align (mem
, BITS_PER_UNIT
);
32712 emit_insn (gen_movmisalignv8qi (mem
, reg
));
32718 /* Set a block of memory using plain strh/strb instructions, only
32719 using instructions allowed by ALIGN on processor. We fill the
32720 first LENGTH bytes of the memory area starting from DSTBASE
32721 with byte constant VALUE. ALIGN is the alignment requirement
32724 arm_block_set_unaligned_non_vect (rtx dstbase
,
32725 unsigned HOST_WIDE_INT length
,
32726 unsigned HOST_WIDE_INT value
,
32727 unsigned HOST_WIDE_INT align
)
32730 rtx dst
, addr
, mem
;
32731 rtx val_exp
, val_reg
, reg
;
32733 HOST_WIDE_INT v
= value
;
32735 gcc_assert (align
== 1 || align
== 2);
32738 v
|= (value
<< BITS_PER_UNIT
);
32740 v
= sext_hwi (v
, BITS_PER_WORD
);
32741 val_exp
= GEN_INT (v
);
32742 /* Skip if it isn't profitable. */
32743 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32744 align
, true, false))
32747 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32748 mode
= (align
== 2 ? HImode
: QImode
);
32749 val_reg
= force_reg (SImode
, val_exp
);
32750 reg
= gen_lowpart (mode
, val_reg
);
32752 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
32754 addr
= plus_constant (Pmode
, dst
, i
);
32755 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
32756 emit_move_insn (mem
, reg
);
32759 /* Handle single byte leftover. */
32760 if (i
+ 1 == length
)
32762 reg
= gen_lowpart (QImode
, val_reg
);
32763 addr
= plus_constant (Pmode
, dst
, i
);
32764 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
32765 emit_move_insn (mem
, reg
);
32769 gcc_assert (i
== length
);
32773 /* Set a block of memory using plain strd/str/strh/strb instructions,
32774 to permit unaligned copies on processors which support unaligned
32775 semantics for those instructions. We fill the first LENGTH bytes
32776 of the memory area starting from DSTBASE with byte constant VALUE.
32777 ALIGN is the alignment requirement of memory. */
32779 arm_block_set_aligned_non_vect (rtx dstbase
,
32780 unsigned HOST_WIDE_INT length
,
32781 unsigned HOST_WIDE_INT value
,
32782 unsigned HOST_WIDE_INT align
)
32785 rtx dst
, addr
, mem
;
32786 rtx val_exp
, val_reg
, reg
;
32787 unsigned HOST_WIDE_INT v
;
32790 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
32791 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
32793 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
32794 if (length
< UNITS_PER_WORD
)
32795 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
32798 v
|= (v
<< BITS_PER_WORD
);
32800 v
= sext_hwi (v
, BITS_PER_WORD
);
32802 val_exp
= GEN_INT (v
);
32803 /* Skip if it isn't profitable. */
32804 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32805 align
, false, use_strd_p
))
32810 /* Try without strd. */
32811 v
= (v
>> BITS_PER_WORD
);
32812 v
= sext_hwi (v
, BITS_PER_WORD
);
32813 val_exp
= GEN_INT (v
);
32814 use_strd_p
= false;
32815 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32816 align
, false, use_strd_p
))
32821 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32822 /* Handle double words using strd if possible. */
32825 val_reg
= force_reg (DImode
, val_exp
);
32827 for (; (i
+ 8 <= length
); i
+= 8)
32829 addr
= plus_constant (Pmode
, dst
, i
);
32830 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
32831 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
32832 emit_move_insn (mem
, reg
);
32834 emit_insn (gen_unaligned_storedi (mem
, reg
));
32838 val_reg
= force_reg (SImode
, val_exp
);
32840 /* Handle words. */
32841 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
32842 for (; (i
+ 4 <= length
); i
+= 4)
32844 addr
= plus_constant (Pmode
, dst
, i
);
32845 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
32846 if ((align
& 3) == 0)
32847 emit_move_insn (mem
, reg
);
32849 emit_insn (gen_unaligned_storesi (mem
, reg
));
32852 /* Merge last pair of STRH and STRB into a STR if possible. */
32853 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
32855 addr
= plus_constant (Pmode
, dst
, i
- 1);
32856 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
32857 /* We are shifting one byte back, set the alignment accordingly. */
32858 if ((align
& 1) == 0)
32859 set_mem_align (mem
, BITS_PER_UNIT
);
32861 /* Most likely this is an unaligned access, and we can't tell at
32862 compilation time. */
32863 emit_insn (gen_unaligned_storesi (mem
, reg
));
32867 /* Handle half word leftover. */
32868 if (i
+ 2 <= length
)
32870 reg
= gen_lowpart (HImode
, val_reg
);
32871 addr
= plus_constant (Pmode
, dst
, i
);
32872 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
32873 if ((align
& 1) == 0)
32874 emit_move_insn (mem
, reg
);
32876 emit_insn (gen_unaligned_storehi (mem
, reg
));
32881 /* Handle single byte leftover. */
32882 if (i
+ 1 == length
)
32884 reg
= gen_lowpart (QImode
, val_reg
);
32885 addr
= plus_constant (Pmode
, dst
, i
);
32886 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
32887 emit_move_insn (mem
, reg
);
32893 /* Set a block of memory using vectorization instructions for both
32894 aligned and unaligned cases. We fill the first LENGTH bytes of
32895 the memory area starting from DSTBASE with byte constant VALUE.
32896 ALIGN is the alignment requirement of memory. */
32898 arm_block_set_vect (rtx dstbase
,
32899 unsigned HOST_WIDE_INT length
,
32900 unsigned HOST_WIDE_INT value
,
32901 unsigned HOST_WIDE_INT align
)
32903 /* Check whether we need to use unaligned store instruction. */
32904 if (((align
& 3) != 0 || (length
& 3) != 0)
32905 /* Check whether unaligned store instruction is available. */
32906 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
32909 if ((align
& 3) == 0)
32910 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
32912 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
32915 /* Expand string store operation. Firstly we try to do that by using
32916 vectorization instructions, then try with ARM unaligned access and
32917 double-word store if profitable. OPERANDS[0] is the destination,
32918 OPERANDS[1] is the number of bytes, operands[2] is the value to
32919 initialize the memory, OPERANDS[3] is the known alignment of the
32922 arm_gen_setmem (rtx
*operands
)
32924 rtx dstbase
= operands
[0];
32925 unsigned HOST_WIDE_INT length
;
32926 unsigned HOST_WIDE_INT value
;
32927 unsigned HOST_WIDE_INT align
;
32929 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
32932 length
= UINTVAL (operands
[1]);
32936 value
= (UINTVAL (operands
[2]) & 0xFF);
32937 align
= UINTVAL (operands
[3]);
32938 if (TARGET_NEON
&& length
>= 8
32939 && current_tune
->string_ops_prefer_neon
32940 && arm_block_set_vect (dstbase
, length
, value
, align
))
32943 if (!unaligned_access
&& (align
& 3) != 0)
32944 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
32946 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
32951 arm_macro_fusion_p (void)
32953 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
32956 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
32957 for MOVW / MOVT macro fusion. */
32960 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
32962 /* We are trying to fuse
32963 movw imm / movt imm
32964 instructions as a group that gets scheduled together. */
32966 rtx set_dest
= SET_DEST (curr_set
);
32968 if (GET_MODE (set_dest
) != SImode
)
32971 /* We are trying to match:
32972 prev (movw) == (set (reg r0) (const_int imm16))
32973 curr (movt) == (set (zero_extract (reg r0)
32976 (const_int imm16_1))
32978 prev (movw) == (set (reg r1)
32979 (high (symbol_ref ("SYM"))))
32980 curr (movt) == (set (reg r0)
32982 (symbol_ref ("SYM")))) */
32984 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
32986 if (CONST_INT_P (SET_SRC (curr_set
))
32987 && CONST_INT_P (SET_SRC (prev_set
))
32988 && REG_P (XEXP (set_dest
, 0))
32989 && REG_P (SET_DEST (prev_set
))
32990 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
32994 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
32995 && REG_P (SET_DEST (curr_set
))
32996 && REG_P (SET_DEST (prev_set
))
32997 && GET_CODE (SET_SRC (prev_set
)) == HIGH
32998 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
33005 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
33007 rtx prev_set
= single_set (prev
);
33008 rtx curr_set
= single_set (curr
);
33014 if (any_condjump_p (curr
))
33017 if (!arm_macro_fusion_p ())
33020 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
33021 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
33027 /* Return true iff the instruction fusion described by OP is enabled. */
33029 arm_fusion_enabled_p (tune_params::fuse_ops op
)
33031 return current_tune
->fusible_ops
& op
;
33034 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
33035 scheduled for speculative execution. Reject the long-running division
33036 and square-root instructions. */
33039 arm_sched_can_speculate_insn (rtx_insn
*insn
)
33041 switch (get_attr_type (insn
))
33049 case TYPE_NEON_FP_SQRT_S
:
33050 case TYPE_NEON_FP_SQRT_D
:
33051 case TYPE_NEON_FP_SQRT_S_Q
:
33052 case TYPE_NEON_FP_SQRT_D_Q
:
33053 case TYPE_NEON_FP_DIV_S
:
33054 case TYPE_NEON_FP_DIV_D
:
33055 case TYPE_NEON_FP_DIV_S_Q
:
33056 case TYPE_NEON_FP_DIV_D_Q
:
33063 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
33065 static unsigned HOST_WIDE_INT
33066 arm_asan_shadow_offset (void)
33068 return HOST_WIDE_INT_1U
<< 29;
33072 /* This is a temporary fix for PR60655. Ideally we need
33073 to handle most of these cases in the generic part but
33074 currently we reject minus (..) (sym_ref). We try to
33075 ameliorate the case with minus (sym_ref1) (sym_ref2)
33076 where they are in the same section. */
33079 arm_const_not_ok_for_debug_p (rtx p
)
33081 tree decl_op0
= NULL
;
33082 tree decl_op1
= NULL
;
33084 if (GET_CODE (p
) == UNSPEC
)
33086 if (GET_CODE (p
) == MINUS
)
33088 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
33090 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
33092 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
33093 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
33095 if ((VAR_P (decl_op1
)
33096 || TREE_CODE (decl_op1
) == CONST_DECL
)
33097 && (VAR_P (decl_op0
)
33098 || TREE_CODE (decl_op0
) == CONST_DECL
))
33099 return (get_variable_section (decl_op1
, false)
33100 != get_variable_section (decl_op0
, false));
33102 if (TREE_CODE (decl_op1
) == LABEL_DECL
33103 && TREE_CODE (decl_op0
) == LABEL_DECL
)
33104 return (DECL_CONTEXT (decl_op1
)
33105 != DECL_CONTEXT (decl_op0
));
33115 /* return TRUE if x is a reference to a value in a constant pool */
33117 arm_is_constant_pool_ref (rtx x
)
33120 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
33121 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
33124 /* Remember the last target of arm_set_current_function. */
33125 static GTY(()) tree arm_previous_fndecl
;
33127 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
33130 save_restore_target_globals (tree new_tree
)
33132 /* If we have a previous state, use it. */
33133 if (TREE_TARGET_GLOBALS (new_tree
))
33134 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
33135 else if (new_tree
== target_option_default_node
)
33136 restore_target_globals (&default_target_globals
);
33139 /* Call target_reinit and save the state for TARGET_GLOBALS. */
33140 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
33143 arm_option_params_internal ();
33146 /* Invalidate arm_previous_fndecl. */
33149 arm_reset_previous_fndecl (void)
33151 arm_previous_fndecl
= NULL_TREE
;
33154 /* Establish appropriate back-end context for processing the function
33155 FNDECL. The argument might be NULL to indicate processing at top
33156 level, outside of any function scope. */
33159 arm_set_current_function (tree fndecl
)
33161 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
33164 tree old_tree
= (arm_previous_fndecl
33165 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
33168 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
33170 /* If current function has no attributes but previous one did,
33171 use the default node. */
33172 if (! new_tree
&& old_tree
)
33173 new_tree
= target_option_default_node
;
33175 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
33176 the default have been handled by save_restore_target_globals from
33177 arm_pragma_target_parse. */
33178 if (old_tree
== new_tree
)
33181 arm_previous_fndecl
= fndecl
;
33183 /* First set the target options. */
33184 cl_target_option_restore (&global_options
, &global_options_set
,
33185 TREE_TARGET_OPTION (new_tree
));
33187 save_restore_target_globals (new_tree
);
33189 arm_override_options_after_change_1 (&global_options
, &global_options_set
);
33192 /* Implement TARGET_OPTION_PRINT. */
33195 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
33197 int flags
= ptr
->x_target_flags
;
33198 const char *fpu_name
;
33200 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
33201 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
33203 fprintf (file
, "%*sselected isa %s\n", indent
, "",
33204 TARGET_THUMB2_P (flags
) ? "thumb2" :
33205 TARGET_THUMB_P (flags
) ? "thumb1" :
33208 if (ptr
->x_arm_arch_string
)
33209 fprintf (file
, "%*sselected architecture %s\n", indent
, "",
33210 ptr
->x_arm_arch_string
);
33212 if (ptr
->x_arm_cpu_string
)
33213 fprintf (file
, "%*sselected CPU %s\n", indent
, "",
33214 ptr
->x_arm_cpu_string
);
33216 if (ptr
->x_arm_tune_string
)
33217 fprintf (file
, "%*sselected tune %s\n", indent
, "",
33218 ptr
->x_arm_tune_string
);
33220 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
33223 /* Hook to determine if one function can safely inline another. */
33226 arm_can_inline_p (tree caller
, tree callee
)
33228 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
33229 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
33230 bool can_inline
= true;
33232 struct cl_target_option
*caller_opts
33233 = TREE_TARGET_OPTION (caller_tree
? caller_tree
33234 : target_option_default_node
);
33236 struct cl_target_option
*callee_opts
33237 = TREE_TARGET_OPTION (callee_tree
? callee_tree
33238 : target_option_default_node
);
33240 if (callee_opts
== caller_opts
)
33243 /* Callee's ISA features should be a subset of the caller's. */
33244 struct arm_build_target caller_target
;
33245 struct arm_build_target callee_target
;
33246 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
33247 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
33249 arm_configure_build_target (&caller_target
, caller_opts
, false);
33250 arm_configure_build_target (&callee_target
, callee_opts
, false);
33251 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
33252 can_inline
= false;
33254 sbitmap_free (caller_target
.isa
);
33255 sbitmap_free (callee_target
.isa
);
33257 /* OK to inline between different modes.
33258 Function with mode specific instructions, e.g using asm,
33259 must be explicitly protected with noinline. */
33263 /* Hook to fix function's alignment affected by target attribute. */
33266 arm_relayout_function (tree fndecl
)
33268 if (DECL_USER_ALIGN (fndecl
))
33271 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
33274 callee_tree
= target_option_default_node
;
33276 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
33279 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
33282 /* Inner function to process the attribute((target(...))), take an argument and
33283 set the current options from the argument. If we have a list, recursively
33284 go over the list. */
33287 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
33289 if (TREE_CODE (args
) == TREE_LIST
)
33293 for (; args
; args
= TREE_CHAIN (args
))
33294 if (TREE_VALUE (args
)
33295 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
33300 else if (TREE_CODE (args
) != STRING_CST
)
33302 error ("attribute %<target%> argument not a string");
33306 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
33309 while ((q
= strtok (argstr
, ",")) != NULL
)
33312 if (!strcmp (q
, "thumb"))
33314 opts
->x_target_flags
|= MASK_THUMB
;
33315 if (TARGET_FDPIC
&& !arm_arch_thumb2
)
33316 sorry ("FDPIC mode is not supported in Thumb-1 mode");
33319 else if (!strcmp (q
, "arm"))
33320 opts
->x_target_flags
&= ~MASK_THUMB
;
33322 else if (!strcmp (q
, "general-regs-only"))
33323 opts
->x_target_flags
|= MASK_GENERAL_REGS_ONLY
;
33325 else if (startswith (q
, "fpu="))
33328 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+ 4,
33329 &fpu_index
, CL_TARGET
))
33331 error ("invalid fpu for target attribute or pragma %qs", q
);
33334 if (fpu_index
== TARGET_FPU_auto
)
33336 /* This doesn't really make sense until we support
33337 general dynamic selection of the architecture and all
33339 sorry ("auto fpu selection not currently permitted here");
33342 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
33344 else if (startswith (q
, "arch="))
33346 char *arch
= q
+ 5;
33347 const arch_option
*arm_selected_arch
33348 = arm_parse_arch_option_name (all_architectures
, "arch", arch
);
33350 if (!arm_selected_arch
)
33352 error ("invalid architecture for target attribute or pragma %qs",
33357 opts
->x_arm_arch_string
= xstrndup (arch
, strlen (arch
));
33359 else if (q
[0] == '+')
33361 opts
->x_arm_arch_string
33362 = xasprintf ("%s%s", opts
->x_arm_arch_string
, q
);
33366 error ("unknown target attribute or pragma %qs", q
);
33374 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
33377 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
33378 struct gcc_options
*opts_set
)
33380 struct cl_target_option cl_opts
;
33382 if (!arm_valid_target_attribute_rec (args
, opts
))
33385 cl_target_option_save (&cl_opts
, opts
, opts_set
);
33386 arm_configure_build_target (&arm_active_target
, &cl_opts
, false);
33387 arm_option_check_internal (opts
);
33388 /* Do any overrides, such as global options arch=xxx.
33389 We do this since arm_active_target was overridden. */
33390 arm_option_reconfigure_globals ();
33391 arm_options_perform_arch_sanity_checks ();
33392 arm_option_override_internal (opts
, opts_set
);
33394 return build_target_option_node (opts
, opts_set
);
33398 add_attribute (const char * mode
, tree
*attributes
)
33400 size_t len
= strlen (mode
);
33401 tree value
= build_string (len
, mode
);
33403 TREE_TYPE (value
) = build_array_type (char_type_node
,
33404 build_index_type (size_int (len
)));
33406 *attributes
= tree_cons (get_identifier ("target"),
33407 build_tree_list (NULL_TREE
, value
),
33411 /* For testing. Insert thumb or arm modes alternatively on functions. */
33414 arm_insert_attributes (tree fndecl
, tree
* attributes
)
33418 if (! TARGET_FLIP_THUMB
)
33421 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
33422 || fndecl_built_in_p (fndecl
) || DECL_ARTIFICIAL (fndecl
))
33425 /* Nested definitions must inherit mode. */
33426 if (current_function_decl
)
33428 mode
= TARGET_THUMB
? "thumb" : "arm";
33429 add_attribute (mode
, attributes
);
33433 /* If there is already a setting don't change it. */
33434 if (lookup_attribute ("target", *attributes
) != NULL
)
33437 mode
= thumb_flipper
? "thumb" : "arm";
33438 add_attribute (mode
, attributes
);
33440 thumb_flipper
= !thumb_flipper
;
33443 /* Hook to validate attribute((target("string"))). */
33446 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
33447 tree args
, int ARG_UNUSED (flags
))
33450 struct gcc_options func_options
, func_options_set
;
33451 tree cur_tree
, new_optimize
;
33452 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
33454 /* Get the optimization options of the current function. */
33455 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
33457 /* If the function changed the optimization levels as well as setting target
33458 options, start with the optimizations specified. */
33459 if (!func_optimize
)
33460 func_optimize
= optimization_default_node
;
33462 /* Init func_options. */
33463 memset (&func_options
, 0, sizeof (func_options
));
33464 init_options_struct (&func_options
, NULL
);
33465 lang_hooks
.init_options_struct (&func_options
);
33466 memset (&func_options_set
, 0, sizeof (func_options_set
));
33468 /* Initialize func_options to the defaults. */
33469 cl_optimization_restore (&func_options
, &func_options_set
,
33470 TREE_OPTIMIZATION (func_optimize
));
33472 cl_target_option_restore (&func_options
, &func_options_set
,
33473 TREE_TARGET_OPTION (target_option_default_node
));
33475 /* Set func_options flags with new target mode. */
33476 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
33477 &func_options_set
);
33479 if (cur_tree
== NULL_TREE
)
33482 new_optimize
= build_optimization_node (&func_options
, &func_options_set
);
33484 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
33486 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
33491 /* Match an ISA feature bitmap to a named FPU. We always use the
33492 first entry that exactly matches the feature set, so that we
33493 effectively canonicalize the FPU name for the assembler. */
33495 arm_identify_fpu_from_isa (sbitmap isa
)
33497 auto_sbitmap
fpubits (isa_num_bits
);
33498 auto_sbitmap
cand_fpubits (isa_num_bits
);
33500 bitmap_and (fpubits
, isa
, isa_all_fpubits_internal
);
33502 /* If there are no ISA feature bits relating to the FPU, we must be
33503 doing soft-float. */
33504 if (bitmap_empty_p (fpubits
))
33507 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
33509 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
33510 if (bitmap_equal_p (fpubits
, cand_fpubits
))
33511 return all_fpus
[i
].name
;
33513 /* We must find an entry, or things have gone wrong. */
33514 gcc_unreachable ();
33517 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
33518 by the function fndecl. */
33520 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
33522 tree target_parts
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
33524 struct cl_target_option
*targ_options
;
33526 targ_options
= TREE_TARGET_OPTION (target_parts
);
33528 targ_options
= TREE_TARGET_OPTION (target_option_current_node
);
33529 gcc_assert (targ_options
);
33531 arm_print_asm_arch_directives (stream
, targ_options
);
33533 fprintf (stream
, "\t.syntax unified\n");
33537 if (is_called_in_ARM_mode (decl
)
33538 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
33539 && cfun
->is_thunk
))
33540 fprintf (stream
, "\t.code 32\n");
33541 else if (TARGET_THUMB1
)
33542 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
33544 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
33547 fprintf (stream
, "\t.arm\n");
33549 if (TARGET_POKE_FUNCTION_NAME
)
33550 arm_poke_function_name (stream
, (const char *) name
);
33553 /* If MEM is in the form of [base+offset], extract the two parts
33554 of address and set to BASE and OFFSET, otherwise return false
33555 after clearing BASE and OFFSET. */
33558 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
33562 gcc_assert (MEM_P (mem
));
33564 addr
= XEXP (mem
, 0);
33566 /* Strip off const from addresses like (const (addr)). */
33567 if (GET_CODE (addr
) == CONST
)
33568 addr
= XEXP (addr
, 0);
33573 *offset
= const0_rtx
;
33577 if (GET_CODE (addr
) == PLUS
33578 && GET_CODE (XEXP (addr
, 0)) == REG
33579 && CONST_INT_P (XEXP (addr
, 1)))
33581 *base
= XEXP (addr
, 0);
33582 *offset
= XEXP (addr
, 1);
33587 *offset
= NULL_RTX
;
33592 /* If INSN is a load or store of address in the form of [base+offset],
33593 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
33594 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
33595 otherwise return FALSE. */
33598 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
33602 gcc_assert (INSN_P (insn
));
33603 x
= PATTERN (insn
);
33604 if (GET_CODE (x
) != SET
)
33608 dest
= SET_DEST (x
);
33609 if (REG_P (src
) && MEM_P (dest
))
33612 extract_base_offset_in_addr (dest
, base
, offset
);
33614 else if (MEM_P (src
) && REG_P (dest
))
33617 extract_base_offset_in_addr (src
, base
, offset
);
33622 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
33625 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33627 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33628 and PRI are only calculated for these instructions. For other instruction,
33629 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
33630 instruction fusion can be supported by returning different priorities.
33632 It's important that irrelevant instructions get the largest FUSION_PRI. */
33635 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
33636 int *fusion_pri
, int *pri
)
33642 gcc_assert (INSN_P (insn
));
33645 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
33652 /* Load goes first. */
33654 *fusion_pri
= tmp
- 1;
33656 *fusion_pri
= tmp
- 2;
33660 /* INSN with smaller base register goes first. */
33661 tmp
-= ((REGNO (base
) & 0xff) << 20);
33663 /* INSN with smaller offset goes first. */
33664 off_val
= (int)(INTVAL (offset
));
33666 tmp
-= (off_val
& 0xfffff);
33668 tmp
+= ((- off_val
) & 0xfffff);
33675 /* Construct and return a PARALLEL RTX vector with elements numbering the
33676 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
33677 the vector - from the perspective of the architecture. This does not
33678 line up with GCC's perspective on lane numbers, so we end up with
33679 different masks depending on our target endian-ness. The diagram
33680 below may help. We must draw the distinction when building masks
33681 which select one half of the vector. An instruction selecting
33682 architectural low-lanes for a big-endian target, must be described using
33683 a mask selecting GCC high-lanes.
33685 Big-Endian Little-Endian
33687 GCC 0 1 2 3 3 2 1 0
33688 | x | x | x | x | | x | x | x | x |
33689 Architecture 3 2 1 0 3 2 1 0
33691 Low Mask: { 2, 3 } { 0, 1 }
33692 High Mask: { 0, 1 } { 2, 3 }
33696 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
33698 int nunits
= GET_MODE_NUNITS (mode
);
33699 rtvec v
= rtvec_alloc (nunits
/ 2);
33700 int high_base
= nunits
/ 2;
33706 if (BYTES_BIG_ENDIAN
)
33707 base
= high
? low_base
: high_base
;
33709 base
= high
? high_base
: low_base
;
33711 for (i
= 0; i
< nunits
/ 2; i
++)
33712 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
33714 t1
= gen_rtx_PARALLEL (mode
, v
);
33718 /* Check OP for validity as a PARALLEL RTX vector with elements
33719 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
33720 from the perspective of the architecture. See the diagram above
33721 arm_simd_vect_par_cnst_half_p for more details. */
33724 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
33727 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
33728 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
33729 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
33732 if (!VECTOR_MODE_P (mode
))
33735 if (count_op
!= count_ideal
)
33738 for (i
= 0; i
< count_ideal
; i
++)
33740 rtx elt_op
= XVECEXP (op
, 0, i
);
33741 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
33743 if (!CONST_INT_P (elt_op
)
33744 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
33750 /* Can output mi_thunk for all cases except for non-zero vcall_offset
33753 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
33756 /* For now, we punt and not handle this for TARGET_THUMB1. */
33757 if (vcall_offset
&& TARGET_THUMB1
)
33760 /* Otherwise ok. */
33764 /* Generate RTL for a conditional branch with rtx comparison CODE in
33765 mode CC_MODE. The destination of the unlikely conditional branch
33769 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
33773 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
33774 gen_rtx_REG (cc_mode
, CC_REGNUM
),
33777 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
33778 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
33780 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
33783 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
33785 For pure-code sections there is no letter code for this attribute, so
33786 output all the section flags numerically when this is needed. */
33789 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
33792 if (flags
& SECTION_ARM_PURECODE
)
33796 if (!(flags
& SECTION_DEBUG
))
33798 if (flags
& SECTION_EXCLUDE
)
33799 *num
|= 0x80000000;
33800 if (flags
& SECTION_WRITE
)
33802 if (flags
& SECTION_CODE
)
33804 if (flags
& SECTION_MERGE
)
33806 if (flags
& SECTION_STRINGS
)
33808 if (flags
& SECTION_TLS
)
33810 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
33819 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
33821 If pure-code is passed as an option, make sure all functions are in
33822 sections that have the SHF_ARM_PURECODE attribute. */
33825 arm_function_section (tree decl
, enum node_frequency freq
,
33826 bool startup
, bool exit
)
33828 const char * section_name
;
33831 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
33832 return default_function_section (decl
, freq
, startup
, exit
);
33834 if (!target_pure_code
)
33835 return default_function_section (decl
, freq
, startup
, exit
);
33838 section_name
= DECL_SECTION_NAME (decl
);
33840 /* If a function is not in a named section then it falls under the 'default'
33841 text section, also known as '.text'. We can preserve previous behavior as
33842 the default text section already has the SHF_ARM_PURECODE section
33846 section
*default_sec
= default_function_section (decl
, freq
, startup
,
33849 /* If default_sec is not null, then it must be a special section like for
33850 example .text.startup. We set the pure-code attribute and return the
33851 same section to preserve existing behavior. */
33853 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
33854 return default_sec
;
33857 /* Otherwise look whether a section has already been created with
33859 sec
= get_named_section (decl
, section_name
, 0);
33861 /* If that is not the case passing NULL as the section's name to
33862 'get_named_section' will create a section with the declaration's
33864 sec
= get_named_section (decl
, NULL
, 0);
33866 /* Set the SHF_ARM_PURECODE attribute. */
33867 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
33872 /* Implements the TARGET_SECTION_FLAGS hook.
33874 If DECL is a function declaration and pure-code is passed as an option
33875 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
33876 section's name and RELOC indicates whether the declarations initializer may
33877 contain runtime relocations. */
33879 static unsigned int
33880 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
33882 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
33884 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
33885 flags
|= SECTION_ARM_PURECODE
;
33890 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
33893 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
33895 rtx
*quot_p
, rtx
*rem_p
)
33897 if (mode
== SImode
)
33898 gcc_assert (!TARGET_IDIV
);
33900 scalar_int_mode libval_mode
33901 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode
));
33903 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
33904 libval_mode
, op0
, mode
, op1
, mode
);
33906 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
33907 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
33908 GET_MODE_SIZE (mode
));
33910 gcc_assert (quotient
);
33911 gcc_assert (remainder
);
33913 *quot_p
= quotient
;
33914 *rem_p
= remainder
;
33917 /* This function checks for the availability of the coprocessor builtin passed
33918 in BUILTIN for the current target. Returns true if it is available and
33919 false otherwise. If a BUILTIN is passed for which this function has not
33920 been implemented it will cause an exception. */
33923 arm_coproc_builtin_available (enum unspecv builtin
)
33925 /* None of these builtins are available in Thumb mode if the target only
33926 supports Thumb-1. */
33944 case VUNSPEC_LDC2L
:
33946 case VUNSPEC_STC2L
:
33949 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
33956 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
33958 if (arm_arch6
|| arm_arch5te
)
33961 case VUNSPEC_MCRR2
:
33962 case VUNSPEC_MRRC2
:
33967 gcc_unreachable ();
33972 /* This function returns true if OP is a valid memory operand for the ldc and
33973 stc coprocessor instructions and false otherwise. */
33976 arm_coproc_ldc_stc_legitimate_address (rtx op
)
33978 HOST_WIDE_INT range
;
33979 /* Has to be a memory operand. */
33985 /* We accept registers. */
33989 switch GET_CODE (op
)
33993 /* Or registers with an offset. */
33994 if (!REG_P (XEXP (op
, 0)))
33999 /* The offset must be an immediate though. */
34000 if (!CONST_INT_P (op
))
34003 range
= INTVAL (op
);
34005 /* Within the range of [-1020,1020]. */
34006 if (!IN_RANGE (range
, -1020, 1020))
34009 /* And a multiple of 4. */
34010 return (range
% 4) == 0;
34016 return REG_P (XEXP (op
, 0));
34018 gcc_unreachable ();
34023 /* Return the diagnostic message string if conversion from FROMTYPE to
34024 TOTYPE is not allowed, NULL otherwise. */
34026 static const char *
34027 arm_invalid_conversion (const_tree fromtype
, const_tree totype
)
34029 if (element_mode (fromtype
) != element_mode (totype
))
34031 /* Do no allow conversions to/from BFmode scalar types. */
34032 if (TYPE_MODE (fromtype
) == BFmode
)
34033 return N_("invalid conversion from type %<bfloat16_t%>");
34034 if (TYPE_MODE (totype
) == BFmode
)
34035 return N_("invalid conversion to type %<bfloat16_t%>");
34038 /* Conversion allowed. */
34042 /* Return the diagnostic message string if the unary operation OP is
34043 not permitted on TYPE, NULL otherwise. */
34045 static const char *
34046 arm_invalid_unary_op (int op
, const_tree type
)
34048 /* Reject all single-operand operations on BFmode except for &. */
34049 if (element_mode (type
) == BFmode
&& op
!= ADDR_EXPR
)
34050 return N_("operation not permitted on type %<bfloat16_t%>");
34052 /* Operation allowed. */
34056 /* Return the diagnostic message string if the binary operation OP is
34057 not permitted on TYPE1 and TYPE2, NULL otherwise. */
34059 static const char *
34060 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
,
34063 /* Reject all 2-operand operations on BFmode. */
34064 if (element_mode (type1
) == BFmode
34065 || element_mode (type2
) == BFmode
)
34066 return N_("operation not permitted on type %<bfloat16_t%>");
34068 /* Operation allowed. */
34072 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34074 In VFPv1, VFP registers could only be accessed in the mode they were
34075 set, so subregs would be invalid there. However, we don't support
34076 VFPv1 at the moment, and the restriction was lifted in VFPv2.
34078 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34079 VFP registers in little-endian order. We can't describe that accurately to
34080 GCC, so avoid taking subregs of such values.
34082 The only exception is going from a 128-bit to a 64-bit type. In that
34083 case the data layout happens to be consistent for big-endian, so we
34084 explicitly allow that case. */
34087 arm_can_change_mode_class (machine_mode from
, machine_mode to
,
34088 reg_class_t rclass
)
34091 && !(GET_MODE_SIZE (from
) == 16 && GET_MODE_SIZE (to
) == 8)
34092 && (GET_MODE_SIZE (from
) > UNITS_PER_WORD
34093 || GET_MODE_SIZE (to
) > UNITS_PER_WORD
)
34094 && reg_classes_intersect_p (VFP_REGS
, rclass
))
34099 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
34100 strcpy from constants will be faster. */
34102 static HOST_WIDE_INT
34103 arm_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
34105 unsigned int factor
= (TARGET_THUMB
|| ! arm_tune_xscale
? 1 : 2);
34106 if (TREE_CODE (exp
) == STRING_CST
&& !optimize_size
)
34107 return MAX (align
, BITS_PER_WORD
* factor
);
34111 /* Emit a speculation barrier on target architectures that do not have
34112 DSB/ISB directly. Such systems probably don't need a barrier
34113 themselves, but if the code is ever run on a later architecture, it
34114 might become a problem. */
34116 arm_emit_speculation_barrier_function ()
34118 emit_library_call (speculation_barrier_libfunc
, LCT_NORMAL
, VOIDmode
);
34121 /* Have we recorded an explicit access to the Q bit of APSR?. */
34123 arm_q_bit_access (void)
34125 if (cfun
&& cfun
->decl
)
34126 return lookup_attribute ("acle qbit",
34127 DECL_ATTRIBUTES (cfun
->decl
));
34131 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
34133 arm_ge_bits_access (void)
34135 if (cfun
&& cfun
->decl
)
34136 return lookup_attribute ("acle gebits",
34137 DECL_ATTRIBUTES (cfun
->decl
));
34141 /* NULL if insn INSN is valid within a low-overhead loop.
34142 Otherwise return why doloop cannot be applied. */
34144 static const char *
34145 arm_invalid_within_doloop (const rtx_insn
*insn
)
34147 if (!TARGET_HAVE_LOB
)
34148 return default_invalid_within_doloop (insn
);
34151 return "Function call in the loop.";
34153 if (reg_mentioned_p (gen_rtx_REG (SImode
, LR_REGNUM
), insn
))
34154 return "LR is used inside loop.";
34160 arm_target_insn_ok_for_lob (rtx insn
)
34162 basic_block bb
= BLOCK_FOR_INSN (insn
);
34163 /* Make sure the basic block of the target insn is a simple latch
34164 having as single predecessor and successor the body of the loop
34165 itself. Only simple loops with a single basic block as body are
34166 supported for 'low over head loop' making sure that LE target is
34167 above LE itself in the generated code. */
34169 return single_succ_p (bb
)
34170 && single_pred_p (bb
)
34171 && single_succ_edge (bb
)->dest
== single_pred_edge (bb
)->src
34172 && contains_no_active_insn_p (bb
);
34176 namespace selftest
{
34178 /* Scan the static data tables generated by parsecpu.awk looking for
34179 potential issues with the data. We primarily check for
34180 inconsistencies in the option extensions at present (extensions
34181 that duplicate others but aren't marked as aliases). Furthermore,
34182 for correct canonicalization later options must never be a subset
34183 of an earlier option. Any extension should also only specify other
34184 feature bits and never an architecture bit. The architecture is inferred
34185 from the declaration of the extension. */
34187 arm_test_cpu_arch_data (void)
34189 const arch_option
*arch
;
34190 const cpu_option
*cpu
;
34191 auto_sbitmap
target_isa (isa_num_bits
);
34192 auto_sbitmap
isa1 (isa_num_bits
);
34193 auto_sbitmap
isa2 (isa_num_bits
);
34195 for (arch
= all_architectures
; arch
->common
.name
!= NULL
; ++arch
)
34197 const cpu_arch_extension
*ext1
, *ext2
;
34199 if (arch
->common
.extensions
== NULL
)
34202 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
34204 for (ext1
= arch
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
34209 arm_initialize_isa (isa1
, ext1
->isa_bits
);
34210 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
34212 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
34215 arm_initialize_isa (isa2
, ext2
->isa_bits
);
34216 /* If the option is a subset of the parent option, it doesn't
34217 add anything and so isn't useful. */
34218 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
34220 /* If the extension specifies any architectural bits then
34221 disallow it. Extensions should only specify feature bits. */
34222 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
34227 for (cpu
= all_cores
; cpu
->common
.name
!= NULL
; ++cpu
)
34229 const cpu_arch_extension
*ext1
, *ext2
;
34231 if (cpu
->common
.extensions
== NULL
)
34234 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
34236 for (ext1
= cpu
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
34241 arm_initialize_isa (isa1
, ext1
->isa_bits
);
34242 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
34244 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
34247 arm_initialize_isa (isa2
, ext2
->isa_bits
);
34248 /* If the option is a subset of the parent option, it doesn't
34249 add anything and so isn't useful. */
34250 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
34252 /* If the extension specifies any architectural bits then
34253 disallow it. Extensions should only specify feature bits. */
34254 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
34260 /* Scan the static data tables generated by parsecpu.awk looking for
34261 potential issues with the data. Here we check for consistency between the
34262 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34263 a feature bit that is not defined by any FPU flag. */
34265 arm_test_fpu_data (void)
34267 auto_sbitmap
isa_all_fpubits_internal (isa_num_bits
);
34268 auto_sbitmap
fpubits (isa_num_bits
);
34269 auto_sbitmap
tmpset (isa_num_bits
);
34271 static const enum isa_feature fpu_bitlist_internal
[]
34272 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
34273 arm_initialize_isa (isa_all_fpubits_internal
, fpu_bitlist_internal
);
34275 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
34277 arm_initialize_isa (fpubits
, all_fpus
[i
].isa_bits
);
34278 bitmap_and_compl (tmpset
, isa_all_fpubits_internal
, fpubits
);
34279 bitmap_clear (isa_all_fpubits_internal
);
34280 bitmap_copy (isa_all_fpubits_internal
, tmpset
);
34283 if (!bitmap_empty_p (isa_all_fpubits_internal
))
34285 fprintf (stderr
, "Error: found feature bits in the ALL_FPU_INTERAL"
34286 " group that are not defined by any FPU.\n"
34287 " Check your arm-cpus.in.\n");
34288 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal
));
34293 arm_run_selftests (void)
34295 arm_test_cpu_arch_data ();
34296 arm_test_fpu_data ();
34298 } /* Namespace selftest. */
34300 #undef TARGET_RUN_TARGET_SELFTESTS
34301 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34302 #endif /* CHECKING_P */
34304 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34305 global variable based guard use the default else
34306 return a null tree. */
34308 arm_stack_protect_guard (void)
34310 if (arm_stack_protector_guard
== SSP_GLOBAL
)
34311 return default_stack_protect_guard ();
34316 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34317 Unlike the arm version, we do NOT implement asm flag outputs. */
34320 thumb1_md_asm_adjust (vec
<rtx
> &outputs
, vec
<rtx
> & /*inputs*/,
34321 vec
<machine_mode
> & /*input_modes*/,
34322 vec
<const char *> &constraints
, vec
<rtx
> & /*clobbers*/,
34323 HARD_REG_SET
& /*clobbered_regs*/, location_t
/*loc*/)
34325 for (unsigned i
= 0, n
= outputs
.length (); i
< n
; ++i
)
34326 if (startswith (constraints
[i
], "=@cc"))
34328 sorry ("%<asm%> flags not supported in thumb1 mode");
34334 /* Generate code to enable conditional branches in functions over 1 MiB.
34336 operands: is the operands list of the asm insn (see arm_cond_branch or
34337 arm_cond_branch_reversed).
34338 pos_label: is an index into the operands array where operands[pos_label] is
34339 the asm label of the final jump destination.
34340 dest: is a string which is used to generate the asm label of the intermediate
34342 branch_format: is a string denoting the intermediate branch format, e.g.
34343 "beq", "bne", etc. */
34346 arm_gen_far_branch (rtx
* operands
, int pos_label
, const char * dest
,
34347 const char * branch_format
)
34349 rtx_code_label
* tmp_label
= gen_label_rtx ();
34350 char label_buf
[256];
34352 ASM_GENERATE_INTERNAL_LABEL (label_buf
, dest
, \
34353 CODE_LABEL_NUMBER (tmp_label
));
34354 const char *label_ptr
= arm_strip_name_encoding (label_buf
);
34355 rtx dest_label
= operands
[pos_label
];
34356 operands
[pos_label
] = tmp_label
;
34358 snprintf (buffer
, sizeof (buffer
), "%s%s", branch_format
, label_ptr
);
34359 output_asm_insn (buffer
, operands
);
34361 snprintf (buffer
, sizeof (buffer
), "b\t%%l0%d\n%s:", pos_label
, label_ptr
);
34362 operands
[pos_label
] = dest_label
;
34363 output_asm_insn (buffer
, operands
);
34367 /* If given mode matches, load from memory to LO_REGS.
34368 (i.e [Rn], Rn <= LO_REGS). */
34370 arm_mode_base_reg_class (machine_mode mode
)
34372 if (TARGET_HAVE_MVE
34373 && (mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
))
34376 return MODE_BASE_REG_REG_CLASS (mode
);
34379 struct gcc_target targetm
= TARGET_INITIALIZER
;
34381 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
34384 arm_get_mask_mode (machine_mode mode
)
34386 if (TARGET_HAVE_MVE
)
34387 return arm_mode_to_pred_mode (mode
);
34389 return default_get_mask_mode (mode
);
34392 #include "gt-arm.h"