1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2023 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
26 #define INCLUDE_STRING
28 #include "coretypes.h"
38 #include "stringpool.h"
45 #include "diagnostic-core.h"
47 #include "fold-const.h"
48 #include "stor-layout.h"
52 #include "insn-attr.h"
58 #include "sched-int.h"
59 #include "common/common-target.h"
60 #include "langhooks.h"
65 #include "target-globals.h"
67 #include "tm-constrs.h"
69 #include "optabs-libfuncs.h"
72 #include "gimple-iterator.h"
74 #include "tree-vectorizer.h"
76 #include "aarch-common.h"
77 #include "aarch-common-protos.h"
79 /* This file should be included last. */
80 #include "target-def.h"
82 /* Forward definitions of types. */
83 typedef struct minipool_node Mnode
;
84 typedef struct minipool_fixup Mfix
;
86 void (*arm_lang_output_object_attributes_hook
)(void);
93 /* Forward function declarations. */
94 static bool arm_const_not_ok_for_debug_p (rtx
);
95 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
96 static int arm_compute_static_chain_stack_bytes (void);
97 static arm_stack_offsets
*arm_get_frame_offsets (void);
98 static void arm_compute_frame_layout (void);
99 static void arm_add_gc_roots (void);
100 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
101 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
102 static unsigned bit_count (unsigned long);
103 static unsigned bitmap_popcount (const sbitmap
);
104 static int arm_address_register_rtx_p (rtx
, int);
105 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
106 static bool is_called_in_ARM_mode (tree
);
107 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
108 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
109 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
110 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
111 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
112 inline static int thumb1_index_register_rtx_p (rtx
, int);
113 static int thumb_far_jump_used_p (void);
114 static bool thumb_force_lr_save (void);
115 static unsigned arm_size_return_regs (void);
116 static bool arm_assemble_integer (rtx
, unsigned int, int);
117 static void arm_print_operand (FILE *, rtx
, int);
118 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
119 static bool arm_print_operand_punct_valid_p (unsigned char code
);
120 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
121 static arm_cc
get_arm_condition_code (rtx
);
122 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
123 static const char *output_multi_immediate (rtx
*, const char *, const char *,
125 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
126 static struct machine_function
*arm_init_machine_status (void);
127 static void thumb_exit (FILE *, int);
128 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
129 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
130 static Mnode
*add_minipool_forward_ref (Mfix
*);
131 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
132 static Mnode
*add_minipool_backward_ref (Mfix
*);
133 static void assign_minipool_offsets (Mfix
*);
134 static void arm_print_value (FILE *, rtx
);
135 static void dump_minipool (rtx_insn
*);
136 static int arm_barrier_cost (rtx_insn
*);
137 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
138 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
139 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
141 static void arm_reorg (void);
142 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
143 static unsigned long arm_compute_save_reg0_reg12_mask (void);
144 static unsigned long arm_compute_save_core_reg_mask (void);
145 static unsigned long arm_isr_value (tree
);
146 static unsigned long arm_compute_func_type (void);
147 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
148 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
149 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
150 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
151 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
153 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
154 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
155 static void arm_output_function_epilogue (FILE *);
156 static void arm_output_function_prologue (FILE *);
157 static int arm_comp_type_attributes (const_tree
, const_tree
);
158 static void arm_set_default_type_attributes (tree
);
159 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
160 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
161 static int optimal_immediate_sequence (enum rtx_code code
,
162 unsigned HOST_WIDE_INT val
,
163 struct four_ints
*return_sequence
);
164 static int optimal_immediate_sequence_1 (enum rtx_code code
,
165 unsigned HOST_WIDE_INT val
,
166 struct four_ints
*return_sequence
,
168 static int arm_get_strip_length (int);
169 static bool arm_function_ok_for_sibcall (tree
, tree
);
170 static machine_mode
arm_promote_function_mode (const_tree
,
173 static bool arm_return_in_memory (const_tree
, const_tree
);
174 static rtx
arm_function_value (const_tree
, const_tree
, bool);
175 static rtx
arm_libcall_value_1 (machine_mode
);
176 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
177 static bool arm_function_value_regno_p (const unsigned int);
178 static void arm_internal_label (FILE *, const char *, unsigned long);
179 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
181 static bool arm_have_conditional_execution (void);
182 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
183 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
184 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
185 static int arm_insn_cost (rtx_insn
*, bool);
186 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
187 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
188 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
189 static void emit_constant_insn (rtx cond
, rtx pattern
);
190 static rtx_insn
*emit_set_insn (rtx
, rtx
);
191 static void arm_add_cfa_adjust_cfa_note (rtx
, int, rtx
, rtx
);
192 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
193 static void arm_emit_multi_reg_pop (unsigned long);
194 static int vfp_emit_fstmd (int, int);
195 static void arm_emit_vfp_multi_reg_pop (int, int, rtx
);
196 static int arm_arg_partial_bytes (cumulative_args_t
,
197 const function_arg_info
&);
198 static rtx
arm_function_arg (cumulative_args_t
, const function_arg_info
&);
199 static void arm_function_arg_advance (cumulative_args_t
,
200 const function_arg_info
&);
201 static pad_direction
arm_function_arg_padding (machine_mode
, const_tree
);
202 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
203 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
205 static rtx
aapcs_libcall_value (machine_mode
);
206 static int aapcs_select_return_coproc (const_tree
, const_tree
);
208 #ifdef OBJECT_FORMAT_ELF
209 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
210 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
213 static void arm_encode_section_info (tree
, rtx
, int);
216 static void arm_file_end (void);
217 static void arm_file_start (void);
218 static void arm_insert_attributes (tree
, tree
*);
220 static void arm_setup_incoming_varargs (cumulative_args_t
,
221 const function_arg_info
&, int *, int);
222 static bool arm_pass_by_reference (cumulative_args_t
,
223 const function_arg_info
&);
224 static bool arm_promote_prototypes (const_tree
);
225 static bool arm_default_short_enums (void);
226 static bool arm_align_anon_bitfield (void);
227 static bool arm_return_in_msb (const_tree
);
228 static bool arm_must_pass_in_stack (const function_arg_info
&);
229 static bool arm_return_in_memory (const_tree
, const_tree
);
231 static void arm_unwind_emit (FILE *, rtx_insn
*);
232 static bool arm_output_ttype (rtx
);
233 static void arm_asm_emit_except_personality (rtx
);
235 static void arm_asm_init_sections (void);
236 static rtx
arm_dwarf_register_span (rtx
);
238 static tree
arm_cxx_guard_type (void);
239 static bool arm_cxx_guard_mask_bit (void);
240 static tree
arm_get_cookie_size (tree
);
241 static bool arm_cookie_has_size (void);
242 static bool arm_cxx_cdtor_returns_this (void);
243 static bool arm_cxx_key_method_may_be_inline (void);
244 static void arm_cxx_determine_class_data_visibility (tree
);
245 static bool arm_cxx_class_data_always_comdat (void);
246 static bool arm_cxx_use_aeabi_atexit (void);
247 static void arm_init_libfuncs (void);
248 static tree
arm_build_builtin_va_list (void);
249 static void arm_expand_builtin_va_start (tree
, rtx
);
250 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
251 static void arm_option_override (void);
252 static void arm_option_restore (struct gcc_options
*, struct gcc_options
*,
253 struct cl_target_option
*);
254 static void arm_override_options_after_change (void);
255 static void arm_option_print (FILE *, int, struct cl_target_option
*);
256 static void arm_set_current_function (tree
);
257 static bool arm_can_inline_p (tree
, tree
);
258 static void arm_relayout_function (tree
);
259 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
260 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
261 static bool arm_sched_can_speculate_insn (rtx_insn
*);
262 static bool arm_macro_fusion_p (void);
263 static bool arm_cannot_copy_insn_p (rtx_insn
*);
264 static int arm_issue_rate (void);
265 static int arm_sched_variable_issue (FILE *, int, rtx_insn
*, int);
266 static int arm_first_cycle_multipass_dfa_lookahead (void);
267 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
268 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
269 static bool arm_output_addr_const_extra (FILE *, rtx
);
270 static bool arm_allocate_stack_slots_for_args (void);
271 static bool arm_warn_func_return (tree
);
272 static tree
arm_promoted_type (const_tree t
);
273 static bool arm_scalar_mode_supported_p (scalar_mode
);
274 static bool arm_frame_pointer_required (void);
275 static bool arm_can_eliminate (const int, const int);
276 static void arm_asm_trampoline_template (FILE *);
277 static void arm_trampoline_init (rtx
, tree
, rtx
);
278 static rtx
arm_trampoline_adjust_address (rtx
);
279 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
280 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
281 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
282 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
283 static bool arm_array_mode_supported_p (machine_mode
,
284 unsigned HOST_WIDE_INT
);
285 static machine_mode
arm_preferred_simd_mode (scalar_mode
);
286 static bool arm_class_likely_spilled_p (reg_class_t
);
287 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
288 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
289 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
293 static void arm_conditional_register_usage (void);
294 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
295 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
296 static unsigned int arm_autovectorize_vector_modes (vector_modes
*, bool);
297 static int arm_default_branch_cost (bool, bool);
298 static int arm_cortex_a5_branch_cost (bool, bool);
299 static int arm_cortex_m_branch_cost (bool, bool);
300 static int arm_cortex_m7_branch_cost (bool, bool);
302 static bool arm_vectorize_vec_perm_const (machine_mode
, machine_mode
, rtx
, rtx
,
303 rtx
, const vec_perm_indices
&);
305 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
307 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
309 int misalign ATTRIBUTE_UNUSED
);
311 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
312 bool op0_preserve_value
);
313 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
315 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
318 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
320 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
322 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
323 static opt_scalar_float_mode
arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode
);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode
);
326 static bool arm_modes_tieable_p (machine_mode
, machine_mode
);
327 static HOST_WIDE_INT
arm_constant_alignment (const_tree
, HOST_WIDE_INT
);
328 static rtx_insn
*thumb1_md_asm_adjust (vec
<rtx
> &, vec
<rtx
> &,
330 vec
<const char *> &, vec
<rtx
> &,
331 HARD_REG_SET
&, location_t
);
332 static const char *arm_identify_fpu_from_isa (sbitmap
);
334 /* Table of machine attributes. */
335 static const attribute_spec arm_gnu_attributes
[] =
337 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
338 affects_type_identity, handler, exclude } */
339 /* Function calls made to this symbol must be done indirectly, because
340 it may lie outside of the 26 bit addressing range of a normal function
342 { "long_call", 0, 0, false, true, true, false, NULL
, NULL
},
343 /* Whereas these functions are always known to reside within the 26 bit
345 { "short_call", 0, 0, false, true, true, false, NULL
, NULL
},
346 /* Specify the procedure call conventions for a function. */
347 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute
,
349 /* Interrupt Service Routines have special prologue and epilogue requirements. */
350 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
352 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
354 { "naked", 0, 0, true, false, false, false,
355 arm_handle_fndecl_attribute
, NULL
},
357 /* ARM/PE has three new attributes:
359 dllexport - for exporting a function/variable that will live in a dll
360 dllimport - for importing a function/variable from a dll
362 Microsoft allows multiple declspecs in one __declspec, separating
363 them with spaces. We do NOT support this. Instead, use __declspec
366 { "dllimport", 0, 0, true, false, false, false, NULL
, NULL
},
367 { "dllexport", 0, 0, true, false, false, false, NULL
, NULL
},
368 { "interfacearm", 0, 0, true, false, false, false,
369 arm_handle_fndecl_attribute
, NULL
},
370 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
371 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute
,
373 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute
,
375 { "notshared", 0, 0, false, true, false, false,
376 arm_handle_notshared_attribute
, NULL
},
378 /* ARMv8-M Security Extensions support. */
379 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
380 arm_handle_cmse_nonsecure_entry
, NULL
},
381 { "cmse_nonsecure_call", 0, 0, false, false, false, true,
382 arm_handle_cmse_nonsecure_call
, NULL
},
383 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL
, NULL
}
386 static const scoped_attribute_specs arm_gnu_attribute_table
=
388 "gnu", arm_gnu_attributes
391 static const scoped_attribute_specs
*const arm_attribute_table
[] =
393 &arm_gnu_attribute_table
396 /* Initialize the GCC target structure. */
397 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
398 #undef TARGET_MERGE_DECL_ATTRIBUTES
399 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
402 #undef TARGET_CHECK_BUILTIN_CALL
403 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
405 #undef TARGET_LEGITIMIZE_ADDRESS
406 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
408 #undef TARGET_ATTRIBUTE_TABLE
409 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
411 #undef TARGET_INSERT_ATTRIBUTES
412 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
414 #undef TARGET_ASM_FILE_START
415 #define TARGET_ASM_FILE_START arm_file_start
416 #undef TARGET_ASM_FILE_END
417 #define TARGET_ASM_FILE_END arm_file_end
419 #undef TARGET_ASM_ALIGNED_SI_OP
420 #define TARGET_ASM_ALIGNED_SI_OP NULL
421 #undef TARGET_ASM_INTEGER
422 #define TARGET_ASM_INTEGER arm_assemble_integer
424 #undef TARGET_PRINT_OPERAND
425 #define TARGET_PRINT_OPERAND arm_print_operand
426 #undef TARGET_PRINT_OPERAND_ADDRESS
427 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
428 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
429 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
431 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
432 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
434 #undef TARGET_ASM_FUNCTION_PROLOGUE
435 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
437 #undef TARGET_ASM_FUNCTION_EPILOGUE
438 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
440 #undef TARGET_CAN_INLINE_P
441 #define TARGET_CAN_INLINE_P arm_can_inline_p
443 #undef TARGET_RELAYOUT_FUNCTION
444 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
446 #undef TARGET_OPTION_OVERRIDE
447 #define TARGET_OPTION_OVERRIDE arm_option_override
449 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
450 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
452 #undef TARGET_OPTION_RESTORE
453 #define TARGET_OPTION_RESTORE arm_option_restore
455 #undef TARGET_OPTION_PRINT
456 #define TARGET_OPTION_PRINT arm_option_print
458 #undef TARGET_COMP_TYPE_ATTRIBUTES
459 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
461 #undef TARGET_SCHED_CAN_SPECULATE_INSN
462 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
464 #undef TARGET_SCHED_MACRO_FUSION_P
465 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
467 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
468 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
470 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
471 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
473 #undef TARGET_SCHED_ADJUST_COST
474 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
476 #undef TARGET_SET_CURRENT_FUNCTION
477 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
479 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
480 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
482 #undef TARGET_SCHED_REORDER
483 #define TARGET_SCHED_REORDER arm_sched_reorder
485 #undef TARGET_REGISTER_MOVE_COST
486 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
488 #undef TARGET_MEMORY_MOVE_COST
489 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
491 #undef TARGET_ENCODE_SECTION_INFO
493 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
495 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
498 #undef TARGET_STRIP_NAME_ENCODING
499 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
501 #undef TARGET_ASM_INTERNAL_LABEL
502 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
504 #undef TARGET_FLOATN_MODE
505 #define TARGET_FLOATN_MODE arm_floatn_mode
507 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
508 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
510 #undef TARGET_FUNCTION_VALUE
511 #define TARGET_FUNCTION_VALUE arm_function_value
513 #undef TARGET_LIBCALL_VALUE
514 #define TARGET_LIBCALL_VALUE arm_libcall_value
516 #undef TARGET_FUNCTION_VALUE_REGNO_P
517 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
519 #undef TARGET_GIMPLE_FOLD_BUILTIN
520 #define TARGET_GIMPLE_FOLD_BUILTIN arm_gimple_fold_builtin
522 #undef TARGET_ASM_OUTPUT_MI_THUNK
523 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
524 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
525 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
527 #undef TARGET_RTX_COSTS
528 #define TARGET_RTX_COSTS arm_rtx_costs
529 #undef TARGET_ADDRESS_COST
530 #define TARGET_ADDRESS_COST arm_address_cost
531 #undef TARGET_INSN_COST
532 #define TARGET_INSN_COST arm_insn_cost
534 #undef TARGET_SHIFT_TRUNCATION_MASK
535 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
536 #undef TARGET_VECTOR_MODE_SUPPORTED_P
537 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
538 #undef TARGET_ARRAY_MODE_SUPPORTED_P
539 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
540 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
541 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
542 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
543 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
544 arm_autovectorize_vector_modes
546 #undef TARGET_MACHINE_DEPENDENT_REORG
547 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
549 #undef TARGET_INIT_BUILTINS
550 #define TARGET_INIT_BUILTINS arm_init_builtins
551 #undef TARGET_EXPAND_BUILTIN
552 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
553 #undef TARGET_BUILTIN_DECL
554 #define TARGET_BUILTIN_DECL arm_builtin_decl
556 #undef TARGET_INIT_LIBFUNCS
557 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
559 #undef TARGET_PROMOTE_FUNCTION_MODE
560 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
561 #undef TARGET_PROMOTE_PROTOTYPES
562 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
563 #undef TARGET_PASS_BY_REFERENCE
564 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
565 #undef TARGET_ARG_PARTIAL_BYTES
566 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
567 #undef TARGET_FUNCTION_ARG
568 #define TARGET_FUNCTION_ARG arm_function_arg
569 #undef TARGET_FUNCTION_ARG_ADVANCE
570 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
571 #undef TARGET_FUNCTION_ARG_PADDING
572 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
573 #undef TARGET_FUNCTION_ARG_BOUNDARY
574 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
576 #undef TARGET_SETUP_INCOMING_VARARGS
577 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
579 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
580 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
582 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
583 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
584 #undef TARGET_TRAMPOLINE_INIT
585 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
586 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
587 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
589 #undef TARGET_WARN_FUNC_RETURN
590 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
592 #undef TARGET_DEFAULT_SHORT_ENUMS
593 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
595 #undef TARGET_ALIGN_ANON_BITFIELD
596 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
598 #undef TARGET_NARROW_VOLATILE_BITFIELD
599 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
601 #undef TARGET_CXX_GUARD_TYPE
602 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
604 #undef TARGET_CXX_GUARD_MASK_BIT
605 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
607 #undef TARGET_CXX_GET_COOKIE_SIZE
608 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
610 #undef TARGET_CXX_COOKIE_HAS_SIZE
611 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
613 #undef TARGET_CXX_CDTOR_RETURNS_THIS
614 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
616 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
617 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
619 #undef TARGET_CXX_USE_AEABI_ATEXIT
620 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
622 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
623 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
624 arm_cxx_determine_class_data_visibility
626 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
627 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
629 #undef TARGET_RETURN_IN_MSB
630 #define TARGET_RETURN_IN_MSB arm_return_in_msb
632 #undef TARGET_RETURN_IN_MEMORY
633 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
635 #undef TARGET_MUST_PASS_IN_STACK
636 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
639 #undef TARGET_ASM_UNWIND_EMIT
640 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
642 /* EABI unwinding tables use a different format for the typeinfo tables. */
643 #undef TARGET_ASM_TTYPE
644 #define TARGET_ASM_TTYPE arm_output_ttype
646 #undef TARGET_ARM_EABI_UNWINDER
647 #define TARGET_ARM_EABI_UNWINDER true
649 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
650 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
652 #endif /* ARM_UNWIND_INFO */
654 #undef TARGET_ASM_INIT_SECTIONS
655 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
657 #undef TARGET_DWARF_REGISTER_SPAN
658 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
660 #undef TARGET_CANNOT_COPY_INSN_P
661 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
664 #undef TARGET_HAVE_TLS
665 #define TARGET_HAVE_TLS true
668 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
669 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
671 #undef TARGET_LEGITIMATE_CONSTANT_P
672 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
674 #undef TARGET_CANNOT_FORCE_CONST_MEM
675 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
677 #undef TARGET_MAX_ANCHOR_OFFSET
678 #define TARGET_MAX_ANCHOR_OFFSET 4095
680 /* The minimum is set such that the total size of the block
681 for a particular anchor is -4088 + 1 + 4095 bytes, which is
682 divisible by eight, ensuring natural spacing of anchors. */
683 #undef TARGET_MIN_ANCHOR_OFFSET
684 #define TARGET_MIN_ANCHOR_OFFSET -4088
686 #undef TARGET_SCHED_ISSUE_RATE
687 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
689 #undef TARGET_SCHED_VARIABLE_ISSUE
690 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
692 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
693 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
694 arm_first_cycle_multipass_dfa_lookahead
696 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
697 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
698 arm_first_cycle_multipass_dfa_lookahead_guard
700 #undef TARGET_MANGLE_TYPE
701 #define TARGET_MANGLE_TYPE arm_mangle_type
703 #undef TARGET_INVALID_CONVERSION
704 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
706 #undef TARGET_INVALID_UNARY_OP
707 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
709 #undef TARGET_INVALID_BINARY_OP
710 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
712 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
713 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
715 #undef TARGET_BUILD_BUILTIN_VA_LIST
716 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
717 #undef TARGET_EXPAND_BUILTIN_VA_START
718 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
719 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
720 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
723 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
724 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
727 #undef TARGET_LEGITIMATE_ADDRESS_P
728 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
730 #undef TARGET_PREFERRED_RELOAD_CLASS
731 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
733 #undef TARGET_PROMOTED_TYPE
734 #define TARGET_PROMOTED_TYPE arm_promoted_type
736 #undef TARGET_SCALAR_MODE_SUPPORTED_P
737 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
739 #undef TARGET_COMPUTE_FRAME_LAYOUT
740 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
742 #undef TARGET_FRAME_POINTER_REQUIRED
743 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
745 #undef TARGET_CAN_ELIMINATE
746 #define TARGET_CAN_ELIMINATE arm_can_eliminate
748 #undef TARGET_CONDITIONAL_REGISTER_USAGE
749 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
751 #undef TARGET_CLASS_LIKELY_SPILLED_P
752 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
754 #undef TARGET_VECTORIZE_BUILTINS
755 #define TARGET_VECTORIZE_BUILTINS
757 #undef TARGET_VECTOR_ALIGNMENT
758 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
760 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
761 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
762 arm_vector_alignment_reachable
764 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
765 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
766 arm_builtin_support_vector_misalignment
768 #undef TARGET_PREFERRED_RENAME_CLASS
769 #define TARGET_PREFERRED_RENAME_CLASS \
770 arm_preferred_rename_class
772 #undef TARGET_VECTORIZE_VEC_PERM_CONST
773 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
775 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
776 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
777 arm_builtin_vectorization_cost
779 #undef TARGET_CANONICALIZE_COMPARISON
780 #define TARGET_CANONICALIZE_COMPARISON \
781 arm_canonicalize_comparison
783 #undef TARGET_ASAN_SHADOW_OFFSET
784 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
786 #undef MAX_INSN_PER_IT_BLOCK
787 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
789 #undef TARGET_CAN_USE_DOLOOP_P
790 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
792 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
793 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
795 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
796 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
798 #undef TARGET_SCHED_FUSION_PRIORITY
799 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
801 #undef TARGET_ASM_FUNCTION_SECTION
802 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
804 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
805 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
807 #undef TARGET_SECTION_TYPE_FLAGS
808 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
810 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
811 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
813 #undef TARGET_C_EXCESS_PRECISION
814 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
816 /* Although the architecture reserves bits 0 and 1, only the former is
817 used for ARM/Thumb ISA selection in v7 and earlier versions. */
818 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
819 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
821 #undef TARGET_FIXED_CONDITION_CODE_REGS
822 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
824 #undef TARGET_HARD_REGNO_NREGS
825 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
826 #undef TARGET_HARD_REGNO_MODE_OK
827 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
829 #undef TARGET_MODES_TIEABLE_P
830 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
832 #undef TARGET_CAN_CHANGE_MODE_CLASS
833 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
835 #undef TARGET_CONSTANT_ALIGNMENT
836 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
838 #undef TARGET_INVALID_WITHIN_DOLOOP
839 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
841 #undef TARGET_MD_ASM_ADJUST
842 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
844 #undef TARGET_STACK_PROTECT_GUARD
845 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
847 #undef TARGET_VECTORIZE_GET_MASK_MODE
848 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
850 /* Obstack for minipool constant handling. */
851 static struct obstack minipool_obstack
;
852 static char * minipool_startobj
;
854 /* The maximum number of insns skipped which
855 will be conditionalised if possible. */
856 static int max_insns_skipped
= 5;
858 /* True if we are currently building a constant table. */
859 int making_const_table
;
861 /* The processor for which instructions should be scheduled. */
862 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
864 /* The current tuning set. */
865 const struct tune_params
*current_tune
;
867 /* Which floating point hardware to schedule for. */
870 /* Used for Thumb call_via trampolines. */
871 rtx thumb_call_via_label
[14];
872 static int thumb_call_reg_needed
;
874 /* The bits in this mask specify which instruction scheduling options should
876 unsigned int tune_flags
= 0;
878 /* The highest ARM architecture version supported by the
880 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
882 /* Active target architecture and tuning. */
884 struct arm_build_target arm_active_target
;
886 /* The following are used in the arm.md file as equivalents to bits
887 in the above two flag variables. */
889 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
892 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
895 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
898 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
901 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
904 /* Nonzero if this chip supports the ARM 6K extensions. */
907 /* Nonzero if this chip supports the ARM 6KZ extensions. */
910 /* Nonzero if instructions present in ARMv6-M can be used. */
913 /* Nonzero if this chip supports the ARM 7 extensions. */
916 /* Nonzero if this chip supports the Large Physical Address Extension. */
917 int arm_arch_lpae
= 0;
919 /* Nonzero if instructions not present in the 'M' profile can be used. */
920 int arm_arch_notm
= 0;
922 /* Nonzero if instructions present in ARMv7E-M can be used. */
925 /* Nonzero if instructions present in ARMv8 can be used. */
928 /* Nonzero if this chip supports the ARMv8.1 extensions. */
931 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
934 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
937 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
940 /* Nonzero if this chip supports the ARM Architecture 8-M Mainline
942 int arm_arch8m_main
= 0;
944 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
946 int arm_arch8_1m_main
= 0;
948 /* Nonzero if this chip supports the FP16 instructions extension of ARM
950 int arm_fp16_inst
= 0;
952 /* Nonzero if this chip can benefit from load scheduling. */
953 int arm_ld_sched
= 0;
955 /* Nonzero if this chip is a StrongARM. */
956 int arm_tune_strongarm
= 0;
958 /* Nonzero if this chip supports Intel Wireless MMX technology. */
959 int arm_arch_iwmmxt
= 0;
961 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
962 int arm_arch_iwmmxt2
= 0;
964 /* Nonzero if this chip is an XScale. */
965 int arm_arch_xscale
= 0;
967 /* Nonzero if tuning for XScale */
968 int arm_tune_xscale
= 0;
970 /* Nonzero if we want to tune for stores that access the write-buffer.
971 This typically means an ARM6 or ARM7 with MMU or MPU. */
972 int arm_tune_wbuf
= 0;
974 /* Nonzero if tuning for Cortex-A9. */
975 int arm_tune_cortex_a9
= 0;
977 /* Nonzero if we should define __THUMB_INTERWORK__ in the
979 XXX This is a bit of a hack, it's intended to help work around
980 problems in GLD which doesn't understand that armv5t code is
981 interworking clean. */
982 int arm_cpp_interwork
= 0;
984 /* Nonzero if chip supports Thumb 1. */
987 /* Nonzero if chip supports Thumb 2. */
990 /* Nonzero if chip supports integer division instruction. */
991 int arm_arch_arm_hwdiv
;
992 int arm_arch_thumb_hwdiv
;
994 /* Nonzero if chip disallows volatile memory access in IT block. */
995 int arm_arch_no_volatile_ce
;
997 /* Nonzero if we shouldn't use literal pools. */
998 bool arm_disable_literal_pool
= false;
1000 /* The register number to be used for the PIC offset register. */
1001 unsigned arm_pic_register
= INVALID_REGNUM
;
1003 enum arm_pcs arm_pcs_default
;
1005 /* For an explanation of these variables, see final_prescan_insn below. */
1006 int arm_ccfsm_state
;
1007 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
1008 enum arm_cond_code arm_current_cc
;
1010 rtx arm_target_insn
;
1011 int arm_target_label
;
1012 /* The number of conditionally executed insns, including the current insn. */
1013 int arm_condexec_count
= 0;
1014 /* A bitmask specifying the patterns for the IT block.
1015 Zero means do not output an IT block before this insn. */
1016 int arm_condexec_mask
= 0;
1017 /* The number of bits used in arm_condexec_mask. */
1018 int arm_condexec_masklen
= 0;
1020 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1021 int arm_arch_crc
= 0;
1023 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1024 int arm_arch_dotprod
= 0;
1026 /* Nonzero if chip supports the ARMv8-M security extensions. */
1027 int arm_arch_cmse
= 0;
1029 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1030 int arm_m_profile_small_mul
= 0;
1032 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1033 int arm_arch_i8mm
= 0;
1035 /* Nonzero if chip supports the BFloat16 instructions. */
1036 int arm_arch_bf16
= 0;
1038 /* Nonzero if chip supports the Custom Datapath Extension. */
1039 int arm_arch_cde
= 0;
1040 int arm_arch_cde_coproc
= 0;
1041 const int arm_arch_cde_coproc_bits
[] = {
1042 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1045 /* The condition codes of the ARM, and the inverse function. */
1046 static const char * const arm_condition_codes
[] =
1048 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1049 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1052 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1053 int arm_regs_in_sequence
[] =
1055 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1058 #define DEF_FP_SYSREG(reg) #reg,
1059 const char *fp_sysreg_names
[NB_FP_SYSREGS
] = {
1062 #undef DEF_FP_SYSREG
1064 #define ARM_LSL_NAME "lsl"
1065 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1067 #define THUMB2_WORK_REGS \
1068 (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1069 | (1 << SP_REGNUM) \
1070 | (1 << PC_REGNUM) \
1071 | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM \
1072 ? (1 << PIC_OFFSET_TABLE_REGNUM) \
1075 /* Initialization code. */
1079 enum processor_type scheduler
;
1080 unsigned int tune_flags
;
1081 const struct tune_params
*tune
;
1084 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1085 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1092 /* arm generic vectorizer costs. */
1094 struct cpu_vec_costs arm_default_vec_cost
= {
1095 1, /* scalar_stmt_cost. */
1096 1, /* scalar load_cost. */
1097 1, /* scalar_store_cost. */
1098 1, /* vec_stmt_cost. */
1099 1, /* vec_to_scalar_cost. */
1100 1, /* scalar_to_vec_cost. */
1101 1, /* vec_align_load_cost. */
1102 1, /* vec_unalign_load_cost. */
1103 1, /* vec_unalign_store_cost. */
1104 1, /* vec_store_cost. */
1105 3, /* cond_taken_branch_cost. */
1106 1, /* cond_not_taken_branch_cost. */
1109 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1110 #include "aarch-cost-tables.h"
1114 const struct cpu_cost_table cortexa9_extra_costs
=
1121 COSTS_N_INSNS (1), /* shift_reg. */
1122 COSTS_N_INSNS (1), /* arith_shift. */
1123 COSTS_N_INSNS (2), /* arith_shift_reg. */
1125 COSTS_N_INSNS (1), /* log_shift_reg. */
1126 COSTS_N_INSNS (1), /* extend. */
1127 COSTS_N_INSNS (2), /* extend_arith. */
1128 COSTS_N_INSNS (1), /* bfi. */
1129 COSTS_N_INSNS (1), /* bfx. */
1133 true /* non_exec_costs_exec. */
1138 COSTS_N_INSNS (3), /* simple. */
1139 COSTS_N_INSNS (3), /* flag_setting. */
1140 COSTS_N_INSNS (2), /* extend. */
1141 COSTS_N_INSNS (3), /* add. */
1142 COSTS_N_INSNS (2), /* extend_add. */
1143 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1147 0, /* simple (N/A). */
1148 0, /* flag_setting (N/A). */
1149 COSTS_N_INSNS (4), /* extend. */
1151 COSTS_N_INSNS (4), /* extend_add. */
1157 COSTS_N_INSNS (2), /* load. */
1158 COSTS_N_INSNS (2), /* load_sign_extend. */
1159 COSTS_N_INSNS (2), /* ldrd. */
1160 COSTS_N_INSNS (2), /* ldm_1st. */
1161 1, /* ldm_regs_per_insn_1st. */
1162 2, /* ldm_regs_per_insn_subsequent. */
1163 COSTS_N_INSNS (5), /* loadf. */
1164 COSTS_N_INSNS (5), /* loadd. */
1165 COSTS_N_INSNS (1), /* load_unaligned. */
1166 COSTS_N_INSNS (2), /* store. */
1167 COSTS_N_INSNS (2), /* strd. */
1168 COSTS_N_INSNS (2), /* stm_1st. */
1169 1, /* stm_regs_per_insn_1st. */
1170 2, /* stm_regs_per_insn_subsequent. */
1171 COSTS_N_INSNS (1), /* storef. */
1172 COSTS_N_INSNS (1), /* stored. */
1173 COSTS_N_INSNS (1), /* store_unaligned. */
1174 COSTS_N_INSNS (1), /* loadv. */
1175 COSTS_N_INSNS (1) /* storev. */
1180 COSTS_N_INSNS (14), /* div. */
1181 COSTS_N_INSNS (4), /* mult. */
1182 COSTS_N_INSNS (7), /* mult_addsub. */
1183 COSTS_N_INSNS (30), /* fma. */
1184 COSTS_N_INSNS (3), /* addsub. */
1185 COSTS_N_INSNS (1), /* fpconst. */
1186 COSTS_N_INSNS (1), /* neg. */
1187 COSTS_N_INSNS (3), /* compare. */
1188 COSTS_N_INSNS (3), /* widen. */
1189 COSTS_N_INSNS (3), /* narrow. */
1190 COSTS_N_INSNS (3), /* toint. */
1191 COSTS_N_INSNS (3), /* fromint. */
1192 COSTS_N_INSNS (3) /* roundint. */
1196 COSTS_N_INSNS (24), /* div. */
1197 COSTS_N_INSNS (5), /* mult. */
1198 COSTS_N_INSNS (8), /* mult_addsub. */
1199 COSTS_N_INSNS (30), /* fma. */
1200 COSTS_N_INSNS (3), /* addsub. */
1201 COSTS_N_INSNS (1), /* fpconst. */
1202 COSTS_N_INSNS (1), /* neg. */
1203 COSTS_N_INSNS (3), /* compare. */
1204 COSTS_N_INSNS (3), /* widen. */
1205 COSTS_N_INSNS (3), /* narrow. */
1206 COSTS_N_INSNS (3), /* toint. */
1207 COSTS_N_INSNS (3), /* fromint. */
1208 COSTS_N_INSNS (3) /* roundint. */
1213 COSTS_N_INSNS (1), /* alu. */
1214 COSTS_N_INSNS (4), /* mult. */
1215 COSTS_N_INSNS (1), /* movi. */
1216 COSTS_N_INSNS (2), /* dup. */
1217 COSTS_N_INSNS (2) /* extract. */
1221 const struct cpu_cost_table cortexa8_extra_costs
=
1227 COSTS_N_INSNS (1), /* shift. */
1229 COSTS_N_INSNS (1), /* arith_shift. */
1230 0, /* arith_shift_reg. */
1231 COSTS_N_INSNS (1), /* log_shift. */
1232 0, /* log_shift_reg. */
1234 0, /* extend_arith. */
1240 true /* non_exec_costs_exec. */
1245 COSTS_N_INSNS (1), /* simple. */
1246 COSTS_N_INSNS (1), /* flag_setting. */
1247 COSTS_N_INSNS (1), /* extend. */
1248 COSTS_N_INSNS (1), /* add. */
1249 COSTS_N_INSNS (1), /* extend_add. */
1250 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1254 0, /* simple (N/A). */
1255 0, /* flag_setting (N/A). */
1256 COSTS_N_INSNS (2), /* extend. */
1258 COSTS_N_INSNS (2), /* extend_add. */
1264 COSTS_N_INSNS (1), /* load. */
1265 COSTS_N_INSNS (1), /* load_sign_extend. */
1266 COSTS_N_INSNS (1), /* ldrd. */
1267 COSTS_N_INSNS (1), /* ldm_1st. */
1268 1, /* ldm_regs_per_insn_1st. */
1269 2, /* ldm_regs_per_insn_subsequent. */
1270 COSTS_N_INSNS (1), /* loadf. */
1271 COSTS_N_INSNS (1), /* loadd. */
1272 COSTS_N_INSNS (1), /* load_unaligned. */
1273 COSTS_N_INSNS (1), /* store. */
1274 COSTS_N_INSNS (1), /* strd. */
1275 COSTS_N_INSNS (1), /* stm_1st. */
1276 1, /* stm_regs_per_insn_1st. */
1277 2, /* stm_regs_per_insn_subsequent. */
1278 COSTS_N_INSNS (1), /* storef. */
1279 COSTS_N_INSNS (1), /* stored. */
1280 COSTS_N_INSNS (1), /* store_unaligned. */
1281 COSTS_N_INSNS (1), /* loadv. */
1282 COSTS_N_INSNS (1) /* storev. */
1287 COSTS_N_INSNS (36), /* div. */
1288 COSTS_N_INSNS (11), /* mult. */
1289 COSTS_N_INSNS (20), /* mult_addsub. */
1290 COSTS_N_INSNS (30), /* fma. */
1291 COSTS_N_INSNS (9), /* addsub. */
1292 COSTS_N_INSNS (3), /* fpconst. */
1293 COSTS_N_INSNS (3), /* neg. */
1294 COSTS_N_INSNS (6), /* compare. */
1295 COSTS_N_INSNS (4), /* widen. */
1296 COSTS_N_INSNS (4), /* narrow. */
1297 COSTS_N_INSNS (8), /* toint. */
1298 COSTS_N_INSNS (8), /* fromint. */
1299 COSTS_N_INSNS (8) /* roundint. */
1303 COSTS_N_INSNS (64), /* div. */
1304 COSTS_N_INSNS (16), /* mult. */
1305 COSTS_N_INSNS (25), /* mult_addsub. */
1306 COSTS_N_INSNS (30), /* fma. */
1307 COSTS_N_INSNS (9), /* addsub. */
1308 COSTS_N_INSNS (3), /* fpconst. */
1309 COSTS_N_INSNS (3), /* neg. */
1310 COSTS_N_INSNS (6), /* compare. */
1311 COSTS_N_INSNS (6), /* widen. */
1312 COSTS_N_INSNS (6), /* narrow. */
1313 COSTS_N_INSNS (8), /* toint. */
1314 COSTS_N_INSNS (8), /* fromint. */
1315 COSTS_N_INSNS (8) /* roundint. */
1320 COSTS_N_INSNS (1), /* alu. */
1321 COSTS_N_INSNS (4), /* mult. */
1322 COSTS_N_INSNS (1), /* movi. */
1323 COSTS_N_INSNS (2), /* dup. */
1324 COSTS_N_INSNS (2) /* extract. */
1328 const struct cpu_cost_table cortexa5_extra_costs
=
1334 COSTS_N_INSNS (1), /* shift. */
1335 COSTS_N_INSNS (1), /* shift_reg. */
1336 COSTS_N_INSNS (1), /* arith_shift. */
1337 COSTS_N_INSNS (1), /* arith_shift_reg. */
1338 COSTS_N_INSNS (1), /* log_shift. */
1339 COSTS_N_INSNS (1), /* log_shift_reg. */
1340 COSTS_N_INSNS (1), /* extend. */
1341 COSTS_N_INSNS (1), /* extend_arith. */
1342 COSTS_N_INSNS (1), /* bfi. */
1343 COSTS_N_INSNS (1), /* bfx. */
1344 COSTS_N_INSNS (1), /* clz. */
1345 COSTS_N_INSNS (1), /* rev. */
1347 true /* non_exec_costs_exec. */
1354 COSTS_N_INSNS (1), /* flag_setting. */
1355 COSTS_N_INSNS (1), /* extend. */
1356 COSTS_N_INSNS (1), /* add. */
1357 COSTS_N_INSNS (1), /* extend_add. */
1358 COSTS_N_INSNS (7) /* idiv. */
1362 0, /* simple (N/A). */
1363 0, /* flag_setting (N/A). */
1364 COSTS_N_INSNS (1), /* extend. */
1366 COSTS_N_INSNS (2), /* extend_add. */
1372 COSTS_N_INSNS (1), /* load. */
1373 COSTS_N_INSNS (1), /* load_sign_extend. */
1374 COSTS_N_INSNS (6), /* ldrd. */
1375 COSTS_N_INSNS (1), /* ldm_1st. */
1376 1, /* ldm_regs_per_insn_1st. */
1377 2, /* ldm_regs_per_insn_subsequent. */
1378 COSTS_N_INSNS (2), /* loadf. */
1379 COSTS_N_INSNS (4), /* loadd. */
1380 COSTS_N_INSNS (1), /* load_unaligned. */
1381 COSTS_N_INSNS (1), /* store. */
1382 COSTS_N_INSNS (3), /* strd. */
1383 COSTS_N_INSNS (1), /* stm_1st. */
1384 1, /* stm_regs_per_insn_1st. */
1385 2, /* stm_regs_per_insn_subsequent. */
1386 COSTS_N_INSNS (2), /* storef. */
1387 COSTS_N_INSNS (2), /* stored. */
1388 COSTS_N_INSNS (1), /* store_unaligned. */
1389 COSTS_N_INSNS (1), /* loadv. */
1390 COSTS_N_INSNS (1) /* storev. */
1395 COSTS_N_INSNS (15), /* div. */
1396 COSTS_N_INSNS (3), /* mult. */
1397 COSTS_N_INSNS (7), /* mult_addsub. */
1398 COSTS_N_INSNS (7), /* fma. */
1399 COSTS_N_INSNS (3), /* addsub. */
1400 COSTS_N_INSNS (3), /* fpconst. */
1401 COSTS_N_INSNS (3), /* neg. */
1402 COSTS_N_INSNS (3), /* compare. */
1403 COSTS_N_INSNS (3), /* widen. */
1404 COSTS_N_INSNS (3), /* narrow. */
1405 COSTS_N_INSNS (3), /* toint. */
1406 COSTS_N_INSNS (3), /* fromint. */
1407 COSTS_N_INSNS (3) /* roundint. */
1411 COSTS_N_INSNS (30), /* div. */
1412 COSTS_N_INSNS (6), /* mult. */
1413 COSTS_N_INSNS (10), /* mult_addsub. */
1414 COSTS_N_INSNS (7), /* fma. */
1415 COSTS_N_INSNS (3), /* addsub. */
1416 COSTS_N_INSNS (3), /* fpconst. */
1417 COSTS_N_INSNS (3), /* neg. */
1418 COSTS_N_INSNS (3), /* compare. */
1419 COSTS_N_INSNS (3), /* widen. */
1420 COSTS_N_INSNS (3), /* narrow. */
1421 COSTS_N_INSNS (3), /* toint. */
1422 COSTS_N_INSNS (3), /* fromint. */
1423 COSTS_N_INSNS (3) /* roundint. */
1428 COSTS_N_INSNS (1), /* alu. */
1429 COSTS_N_INSNS (4), /* mult. */
1430 COSTS_N_INSNS (1), /* movi. */
1431 COSTS_N_INSNS (2), /* dup. */
1432 COSTS_N_INSNS (2) /* extract. */
1437 const struct cpu_cost_table cortexa7_extra_costs
=
1443 COSTS_N_INSNS (1), /* shift. */
1444 COSTS_N_INSNS (1), /* shift_reg. */
1445 COSTS_N_INSNS (1), /* arith_shift. */
1446 COSTS_N_INSNS (1), /* arith_shift_reg. */
1447 COSTS_N_INSNS (1), /* log_shift. */
1448 COSTS_N_INSNS (1), /* log_shift_reg. */
1449 COSTS_N_INSNS (1), /* extend. */
1450 COSTS_N_INSNS (1), /* extend_arith. */
1451 COSTS_N_INSNS (1), /* bfi. */
1452 COSTS_N_INSNS (1), /* bfx. */
1453 COSTS_N_INSNS (1), /* clz. */
1454 COSTS_N_INSNS (1), /* rev. */
1456 true /* non_exec_costs_exec. */
1463 COSTS_N_INSNS (1), /* flag_setting. */
1464 COSTS_N_INSNS (1), /* extend. */
1465 COSTS_N_INSNS (1), /* add. */
1466 COSTS_N_INSNS (1), /* extend_add. */
1467 COSTS_N_INSNS (7) /* idiv. */
1471 0, /* simple (N/A). */
1472 0, /* flag_setting (N/A). */
1473 COSTS_N_INSNS (1), /* extend. */
1475 COSTS_N_INSNS (2), /* extend_add. */
1481 COSTS_N_INSNS (1), /* load. */
1482 COSTS_N_INSNS (1), /* load_sign_extend. */
1483 COSTS_N_INSNS (3), /* ldrd. */
1484 COSTS_N_INSNS (1), /* ldm_1st. */
1485 1, /* ldm_regs_per_insn_1st. */
1486 2, /* ldm_regs_per_insn_subsequent. */
1487 COSTS_N_INSNS (2), /* loadf. */
1488 COSTS_N_INSNS (2), /* loadd. */
1489 COSTS_N_INSNS (1), /* load_unaligned. */
1490 COSTS_N_INSNS (1), /* store. */
1491 COSTS_N_INSNS (3), /* strd. */
1492 COSTS_N_INSNS (1), /* stm_1st. */
1493 1, /* stm_regs_per_insn_1st. */
1494 2, /* stm_regs_per_insn_subsequent. */
1495 COSTS_N_INSNS (2), /* storef. */
1496 COSTS_N_INSNS (2), /* stored. */
1497 COSTS_N_INSNS (1), /* store_unaligned. */
1498 COSTS_N_INSNS (1), /* loadv. */
1499 COSTS_N_INSNS (1) /* storev. */
1504 COSTS_N_INSNS (15), /* div. */
1505 COSTS_N_INSNS (3), /* mult. */
1506 COSTS_N_INSNS (7), /* mult_addsub. */
1507 COSTS_N_INSNS (7), /* fma. */
1508 COSTS_N_INSNS (3), /* addsub. */
1509 COSTS_N_INSNS (3), /* fpconst. */
1510 COSTS_N_INSNS (3), /* neg. */
1511 COSTS_N_INSNS (3), /* compare. */
1512 COSTS_N_INSNS (3), /* widen. */
1513 COSTS_N_INSNS (3), /* narrow. */
1514 COSTS_N_INSNS (3), /* toint. */
1515 COSTS_N_INSNS (3), /* fromint. */
1516 COSTS_N_INSNS (3) /* roundint. */
1520 COSTS_N_INSNS (30), /* div. */
1521 COSTS_N_INSNS (6), /* mult. */
1522 COSTS_N_INSNS (10), /* mult_addsub. */
1523 COSTS_N_INSNS (7), /* fma. */
1524 COSTS_N_INSNS (3), /* addsub. */
1525 COSTS_N_INSNS (3), /* fpconst. */
1526 COSTS_N_INSNS (3), /* neg. */
1527 COSTS_N_INSNS (3), /* compare. */
1528 COSTS_N_INSNS (3), /* widen. */
1529 COSTS_N_INSNS (3), /* narrow. */
1530 COSTS_N_INSNS (3), /* toint. */
1531 COSTS_N_INSNS (3), /* fromint. */
1532 COSTS_N_INSNS (3) /* roundint. */
1537 COSTS_N_INSNS (1), /* alu. */
1538 COSTS_N_INSNS (4), /* mult. */
1539 COSTS_N_INSNS (1), /* movi. */
1540 COSTS_N_INSNS (2), /* dup. */
1541 COSTS_N_INSNS (2) /* extract. */
1545 const struct cpu_cost_table cortexa12_extra_costs
=
1552 COSTS_N_INSNS (1), /* shift_reg. */
1553 COSTS_N_INSNS (1), /* arith_shift. */
1554 COSTS_N_INSNS (1), /* arith_shift_reg. */
1555 COSTS_N_INSNS (1), /* log_shift. */
1556 COSTS_N_INSNS (1), /* log_shift_reg. */
1558 COSTS_N_INSNS (1), /* extend_arith. */
1560 COSTS_N_INSNS (1), /* bfx. */
1561 COSTS_N_INSNS (1), /* clz. */
1562 COSTS_N_INSNS (1), /* rev. */
1564 true /* non_exec_costs_exec. */
1569 COSTS_N_INSNS (2), /* simple. */
1570 COSTS_N_INSNS (3), /* flag_setting. */
1571 COSTS_N_INSNS (2), /* extend. */
1572 COSTS_N_INSNS (3), /* add. */
1573 COSTS_N_INSNS (2), /* extend_add. */
1574 COSTS_N_INSNS (18) /* idiv. */
1578 0, /* simple (N/A). */
1579 0, /* flag_setting (N/A). */
1580 COSTS_N_INSNS (3), /* extend. */
1582 COSTS_N_INSNS (3), /* extend_add. */
1588 COSTS_N_INSNS (3), /* load. */
1589 COSTS_N_INSNS (3), /* load_sign_extend. */
1590 COSTS_N_INSNS (3), /* ldrd. */
1591 COSTS_N_INSNS (3), /* ldm_1st. */
1592 1, /* ldm_regs_per_insn_1st. */
1593 2, /* ldm_regs_per_insn_subsequent. */
1594 COSTS_N_INSNS (3), /* loadf. */
1595 COSTS_N_INSNS (3), /* loadd. */
1596 0, /* load_unaligned. */
1600 1, /* stm_regs_per_insn_1st. */
1601 2, /* stm_regs_per_insn_subsequent. */
1602 COSTS_N_INSNS (2), /* storef. */
1603 COSTS_N_INSNS (2), /* stored. */
1604 0, /* store_unaligned. */
1605 COSTS_N_INSNS (1), /* loadv. */
1606 COSTS_N_INSNS (1) /* storev. */
1611 COSTS_N_INSNS (17), /* div. */
1612 COSTS_N_INSNS (4), /* mult. */
1613 COSTS_N_INSNS (8), /* mult_addsub. */
1614 COSTS_N_INSNS (8), /* fma. */
1615 COSTS_N_INSNS (4), /* addsub. */
1616 COSTS_N_INSNS (2), /* fpconst. */
1617 COSTS_N_INSNS (2), /* neg. */
1618 COSTS_N_INSNS (2), /* compare. */
1619 COSTS_N_INSNS (4), /* widen. */
1620 COSTS_N_INSNS (4), /* narrow. */
1621 COSTS_N_INSNS (4), /* toint. */
1622 COSTS_N_INSNS (4), /* fromint. */
1623 COSTS_N_INSNS (4) /* roundint. */
1627 COSTS_N_INSNS (31), /* div. */
1628 COSTS_N_INSNS (4), /* mult. */
1629 COSTS_N_INSNS (8), /* mult_addsub. */
1630 COSTS_N_INSNS (8), /* fma. */
1631 COSTS_N_INSNS (4), /* addsub. */
1632 COSTS_N_INSNS (2), /* fpconst. */
1633 COSTS_N_INSNS (2), /* neg. */
1634 COSTS_N_INSNS (2), /* compare. */
1635 COSTS_N_INSNS (4), /* widen. */
1636 COSTS_N_INSNS (4), /* narrow. */
1637 COSTS_N_INSNS (4), /* toint. */
1638 COSTS_N_INSNS (4), /* fromint. */
1639 COSTS_N_INSNS (4) /* roundint. */
1644 COSTS_N_INSNS (1), /* alu. */
1645 COSTS_N_INSNS (4), /* mult. */
1646 COSTS_N_INSNS (1), /* movi. */
1647 COSTS_N_INSNS (2), /* dup. */
1648 COSTS_N_INSNS (2) /* extract. */
1652 const struct cpu_cost_table cortexa15_extra_costs
=
1660 COSTS_N_INSNS (1), /* arith_shift. */
1661 COSTS_N_INSNS (1), /* arith_shift_reg. */
1662 COSTS_N_INSNS (1), /* log_shift. */
1663 COSTS_N_INSNS (1), /* log_shift_reg. */
1665 COSTS_N_INSNS (1), /* extend_arith. */
1666 COSTS_N_INSNS (1), /* bfi. */
1671 true /* non_exec_costs_exec. */
1676 COSTS_N_INSNS (2), /* simple. */
1677 COSTS_N_INSNS (3), /* flag_setting. */
1678 COSTS_N_INSNS (2), /* extend. */
1679 COSTS_N_INSNS (2), /* add. */
1680 COSTS_N_INSNS (2), /* extend_add. */
1681 COSTS_N_INSNS (18) /* idiv. */
1685 0, /* simple (N/A). */
1686 0, /* flag_setting (N/A). */
1687 COSTS_N_INSNS (3), /* extend. */
1689 COSTS_N_INSNS (3), /* extend_add. */
1695 COSTS_N_INSNS (3), /* load. */
1696 COSTS_N_INSNS (3), /* load_sign_extend. */
1697 COSTS_N_INSNS (3), /* ldrd. */
1698 COSTS_N_INSNS (4), /* ldm_1st. */
1699 1, /* ldm_regs_per_insn_1st. */
1700 2, /* ldm_regs_per_insn_subsequent. */
1701 COSTS_N_INSNS (4), /* loadf. */
1702 COSTS_N_INSNS (4), /* loadd. */
1703 0, /* load_unaligned. */
1706 COSTS_N_INSNS (1), /* stm_1st. */
1707 1, /* stm_regs_per_insn_1st. */
1708 2, /* stm_regs_per_insn_subsequent. */
1711 0, /* store_unaligned. */
1712 COSTS_N_INSNS (1), /* loadv. */
1713 COSTS_N_INSNS (1) /* storev. */
1718 COSTS_N_INSNS (17), /* div. */
1719 COSTS_N_INSNS (4), /* mult. */
1720 COSTS_N_INSNS (8), /* mult_addsub. */
1721 COSTS_N_INSNS (8), /* fma. */
1722 COSTS_N_INSNS (4), /* addsub. */
1723 COSTS_N_INSNS (2), /* fpconst. */
1724 COSTS_N_INSNS (2), /* neg. */
1725 COSTS_N_INSNS (5), /* compare. */
1726 COSTS_N_INSNS (4), /* widen. */
1727 COSTS_N_INSNS (4), /* narrow. */
1728 COSTS_N_INSNS (4), /* toint. */
1729 COSTS_N_INSNS (4), /* fromint. */
1730 COSTS_N_INSNS (4) /* roundint. */
1734 COSTS_N_INSNS (31), /* div. */
1735 COSTS_N_INSNS (4), /* mult. */
1736 COSTS_N_INSNS (8), /* mult_addsub. */
1737 COSTS_N_INSNS (8), /* fma. */
1738 COSTS_N_INSNS (4), /* addsub. */
1739 COSTS_N_INSNS (2), /* fpconst. */
1740 COSTS_N_INSNS (2), /* neg. */
1741 COSTS_N_INSNS (2), /* compare. */
1742 COSTS_N_INSNS (4), /* widen. */
1743 COSTS_N_INSNS (4), /* narrow. */
1744 COSTS_N_INSNS (4), /* toint. */
1745 COSTS_N_INSNS (4), /* fromint. */
1746 COSTS_N_INSNS (4) /* roundint. */
1751 COSTS_N_INSNS (1), /* alu. */
1752 COSTS_N_INSNS (4), /* mult. */
1753 COSTS_N_INSNS (1), /* movi. */
1754 COSTS_N_INSNS (2), /* dup. */
1755 COSTS_N_INSNS (2) /* extract. */
1759 const struct cpu_cost_table v7m_extra_costs
=
1767 0, /* arith_shift. */
1768 COSTS_N_INSNS (1), /* arith_shift_reg. */
1770 COSTS_N_INSNS (1), /* log_shift_reg. */
1772 COSTS_N_INSNS (1), /* extend_arith. */
1777 COSTS_N_INSNS (1), /* non_exec. */
1778 false /* non_exec_costs_exec. */
1783 COSTS_N_INSNS (1), /* simple. */
1784 COSTS_N_INSNS (1), /* flag_setting. */
1785 COSTS_N_INSNS (2), /* extend. */
1786 COSTS_N_INSNS (1), /* add. */
1787 COSTS_N_INSNS (3), /* extend_add. */
1788 COSTS_N_INSNS (8) /* idiv. */
1792 0, /* simple (N/A). */
1793 0, /* flag_setting (N/A). */
1794 COSTS_N_INSNS (2), /* extend. */
1796 COSTS_N_INSNS (3), /* extend_add. */
1802 COSTS_N_INSNS (2), /* load. */
1803 0, /* load_sign_extend. */
1804 COSTS_N_INSNS (3), /* ldrd. */
1805 COSTS_N_INSNS (2), /* ldm_1st. */
1806 1, /* ldm_regs_per_insn_1st. */
1807 1, /* ldm_regs_per_insn_subsequent. */
1808 COSTS_N_INSNS (2), /* loadf. */
1809 COSTS_N_INSNS (3), /* loadd. */
1810 COSTS_N_INSNS (1), /* load_unaligned. */
1811 COSTS_N_INSNS (2), /* store. */
1812 COSTS_N_INSNS (3), /* strd. */
1813 COSTS_N_INSNS (2), /* stm_1st. */
1814 1, /* stm_regs_per_insn_1st. */
1815 1, /* stm_regs_per_insn_subsequent. */
1816 COSTS_N_INSNS (2), /* storef. */
1817 COSTS_N_INSNS (3), /* stored. */
1818 COSTS_N_INSNS (1), /* store_unaligned. */
1819 COSTS_N_INSNS (1), /* loadv. */
1820 COSTS_N_INSNS (1) /* storev. */
1825 COSTS_N_INSNS (7), /* div. */
1826 COSTS_N_INSNS (2), /* mult. */
1827 COSTS_N_INSNS (5), /* mult_addsub. */
1828 COSTS_N_INSNS (3), /* fma. */
1829 COSTS_N_INSNS (1), /* addsub. */
1841 COSTS_N_INSNS (15), /* div. */
1842 COSTS_N_INSNS (5), /* mult. */
1843 COSTS_N_INSNS (7), /* mult_addsub. */
1844 COSTS_N_INSNS (7), /* fma. */
1845 COSTS_N_INSNS (3), /* addsub. */
1858 COSTS_N_INSNS (1), /* alu. */
1859 COSTS_N_INSNS (4), /* mult. */
1860 COSTS_N_INSNS (1), /* movi. */
1861 COSTS_N_INSNS (2), /* dup. */
1862 COSTS_N_INSNS (2) /* extract. */
1866 const struct addr_mode_cost_table generic_addr_mode_costs
=
1870 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1871 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1872 COSTS_N_INSNS (0) /* AMO_WB. */
1876 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1877 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1878 COSTS_N_INSNS (0) /* AMO_WB. */
1882 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1883 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1884 COSTS_N_INSNS (0) /* AMO_WB. */
1888 const struct tune_params arm_slowmul_tune
=
1890 &generic_extra_costs
, /* Insn extra costs. */
1891 &generic_addr_mode_costs
, /* Addressing mode costs. */
1892 NULL
, /* Sched adj cost. */
1893 arm_default_branch_cost
,
1894 &arm_default_vec_cost
,
1895 3, /* Constant limit. */
1896 5, /* Max cond insns. */
1897 8, /* Memset max inline. */
1898 1, /* Issue rate. */
1899 ARM_PREFETCH_NOT_BENEFICIAL
,
1900 tune_params::PREF_CONST_POOL_TRUE
,
1901 tune_params::PREF_LDRD_FALSE
,
1902 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1903 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1904 tune_params::DISPARAGE_FLAGS_NEITHER
,
1905 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1906 tune_params::FUSE_NOTHING
,
1907 tune_params::SCHED_AUTOPREF_OFF
1910 const struct tune_params arm_fastmul_tune
=
1912 &generic_extra_costs
, /* Insn extra costs. */
1913 &generic_addr_mode_costs
, /* Addressing mode costs. */
1914 NULL
, /* Sched adj cost. */
1915 arm_default_branch_cost
,
1916 &arm_default_vec_cost
,
1917 1, /* Constant limit. */
1918 5, /* Max cond insns. */
1919 8, /* Memset max inline. */
1920 1, /* Issue rate. */
1921 ARM_PREFETCH_NOT_BENEFICIAL
,
1922 tune_params::PREF_CONST_POOL_TRUE
,
1923 tune_params::PREF_LDRD_FALSE
,
1924 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1925 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1926 tune_params::DISPARAGE_FLAGS_NEITHER
,
1927 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1928 tune_params::FUSE_NOTHING
,
1929 tune_params::SCHED_AUTOPREF_OFF
1932 /* StrongARM has early execution of branches, so a sequence that is worth
1933 skipping is shorter. Set max_insns_skipped to a lower value. */
1935 const struct tune_params arm_strongarm_tune
=
1937 &generic_extra_costs
, /* Insn extra costs. */
1938 &generic_addr_mode_costs
, /* Addressing mode costs. */
1939 NULL
, /* Sched adj cost. */
1940 arm_default_branch_cost
,
1941 &arm_default_vec_cost
,
1942 1, /* Constant limit. */
1943 3, /* Max cond insns. */
1944 8, /* Memset max inline. */
1945 1, /* Issue rate. */
1946 ARM_PREFETCH_NOT_BENEFICIAL
,
1947 tune_params::PREF_CONST_POOL_TRUE
,
1948 tune_params::PREF_LDRD_FALSE
,
1949 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1950 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1951 tune_params::DISPARAGE_FLAGS_NEITHER
,
1952 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1953 tune_params::FUSE_NOTHING
,
1954 tune_params::SCHED_AUTOPREF_OFF
1957 const struct tune_params arm_xscale_tune
=
1959 &generic_extra_costs
, /* Insn extra costs. */
1960 &generic_addr_mode_costs
, /* Addressing mode costs. */
1961 xscale_sched_adjust_cost
,
1962 arm_default_branch_cost
,
1963 &arm_default_vec_cost
,
1964 2, /* Constant limit. */
1965 3, /* Max cond insns. */
1966 8, /* Memset max inline. */
1967 1, /* Issue rate. */
1968 ARM_PREFETCH_NOT_BENEFICIAL
,
1969 tune_params::PREF_CONST_POOL_TRUE
,
1970 tune_params::PREF_LDRD_FALSE
,
1971 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1972 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1973 tune_params::DISPARAGE_FLAGS_NEITHER
,
1974 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1975 tune_params::FUSE_NOTHING
,
1976 tune_params::SCHED_AUTOPREF_OFF
1979 const struct tune_params arm_9e_tune
=
1981 &generic_extra_costs
, /* Insn extra costs. */
1982 &generic_addr_mode_costs
, /* Addressing mode costs. */
1983 NULL
, /* Sched adj cost. */
1984 arm_default_branch_cost
,
1985 &arm_default_vec_cost
,
1986 1, /* Constant limit. */
1987 5, /* Max cond insns. */
1988 8, /* Memset max inline. */
1989 1, /* Issue rate. */
1990 ARM_PREFETCH_NOT_BENEFICIAL
,
1991 tune_params::PREF_CONST_POOL_TRUE
,
1992 tune_params::PREF_LDRD_FALSE
,
1993 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1994 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1995 tune_params::DISPARAGE_FLAGS_NEITHER
,
1996 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1997 tune_params::FUSE_NOTHING
,
1998 tune_params::SCHED_AUTOPREF_OFF
2001 const struct tune_params arm_marvell_pj4_tune
=
2003 &generic_extra_costs
, /* Insn extra costs. */
2004 &generic_addr_mode_costs
, /* Addressing mode costs. */
2005 NULL
, /* Sched adj cost. */
2006 arm_default_branch_cost
,
2007 &arm_default_vec_cost
,
2008 1, /* Constant limit. */
2009 5, /* Max cond insns. */
2010 8, /* Memset max inline. */
2011 2, /* Issue rate. */
2012 ARM_PREFETCH_NOT_BENEFICIAL
,
2013 tune_params::PREF_CONST_POOL_TRUE
,
2014 tune_params::PREF_LDRD_FALSE
,
2015 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2016 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2017 tune_params::DISPARAGE_FLAGS_NEITHER
,
2018 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2019 tune_params::FUSE_NOTHING
,
2020 tune_params::SCHED_AUTOPREF_OFF
2023 const struct tune_params arm_v6t2_tune
=
2025 &generic_extra_costs
, /* Insn extra costs. */
2026 &generic_addr_mode_costs
, /* Addressing mode costs. */
2027 NULL
, /* Sched adj cost. */
2028 arm_default_branch_cost
,
2029 &arm_default_vec_cost
,
2030 1, /* Constant limit. */
2031 5, /* Max cond insns. */
2032 8, /* Memset max inline. */
2033 1, /* Issue rate. */
2034 ARM_PREFETCH_NOT_BENEFICIAL
,
2035 tune_params::PREF_CONST_POOL_FALSE
,
2036 tune_params::PREF_LDRD_FALSE
,
2037 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2038 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2039 tune_params::DISPARAGE_FLAGS_NEITHER
,
2040 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2041 tune_params::FUSE_NOTHING
,
2042 tune_params::SCHED_AUTOPREF_OFF
2046 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2047 const struct tune_params arm_cortex_tune
=
2049 &generic_extra_costs
,
2050 &generic_addr_mode_costs
, /* Addressing mode costs. */
2051 NULL
, /* Sched adj cost. */
2052 arm_default_branch_cost
,
2053 &arm_default_vec_cost
,
2054 1, /* Constant limit. */
2055 5, /* Max cond insns. */
2056 8, /* Memset max inline. */
2057 2, /* Issue rate. */
2058 ARM_PREFETCH_NOT_BENEFICIAL
,
2059 tune_params::PREF_CONST_POOL_FALSE
,
2060 tune_params::PREF_LDRD_FALSE
,
2061 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2062 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2063 tune_params::DISPARAGE_FLAGS_NEITHER
,
2064 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2065 tune_params::FUSE_NOTHING
,
2066 tune_params::SCHED_AUTOPREF_OFF
2069 const struct tune_params arm_cortex_a8_tune
=
2071 &cortexa8_extra_costs
,
2072 &generic_addr_mode_costs
, /* Addressing mode costs. */
2073 NULL
, /* Sched adj cost. */
2074 arm_default_branch_cost
,
2075 &arm_default_vec_cost
,
2076 1, /* Constant limit. */
2077 5, /* Max cond insns. */
2078 8, /* Memset max inline. */
2079 2, /* Issue rate. */
2080 ARM_PREFETCH_NOT_BENEFICIAL
,
2081 tune_params::PREF_CONST_POOL_FALSE
,
2082 tune_params::PREF_LDRD_FALSE
,
2083 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2084 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2085 tune_params::DISPARAGE_FLAGS_NEITHER
,
2086 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2087 tune_params::FUSE_NOTHING
,
2088 tune_params::SCHED_AUTOPREF_OFF
2091 const struct tune_params arm_cortex_a7_tune
=
2093 &cortexa7_extra_costs
,
2094 &generic_addr_mode_costs
, /* Addressing mode costs. */
2095 NULL
, /* Sched adj cost. */
2096 arm_default_branch_cost
,
2097 &arm_default_vec_cost
,
2098 1, /* Constant limit. */
2099 5, /* Max cond insns. */
2100 8, /* Memset max inline. */
2101 2, /* Issue rate. */
2102 ARM_PREFETCH_NOT_BENEFICIAL
,
2103 tune_params::PREF_CONST_POOL_FALSE
,
2104 tune_params::PREF_LDRD_FALSE
,
2105 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2106 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2107 tune_params::DISPARAGE_FLAGS_NEITHER
,
2108 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2109 tune_params::FUSE_NOTHING
,
2110 tune_params::SCHED_AUTOPREF_OFF
2113 const struct tune_params arm_cortex_a15_tune
=
2115 &cortexa15_extra_costs
,
2116 &generic_addr_mode_costs
, /* Addressing mode costs. */
2117 NULL
, /* Sched adj cost. */
2118 arm_default_branch_cost
,
2119 &arm_default_vec_cost
,
2120 1, /* Constant limit. */
2121 2, /* Max cond insns. */
2122 8, /* Memset max inline. */
2123 3, /* Issue rate. */
2124 ARM_PREFETCH_NOT_BENEFICIAL
,
2125 tune_params::PREF_CONST_POOL_FALSE
,
2126 tune_params::PREF_LDRD_TRUE
,
2127 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2128 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2129 tune_params::DISPARAGE_FLAGS_ALL
,
2130 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2131 tune_params::FUSE_NOTHING
,
2132 tune_params::SCHED_AUTOPREF_FULL
2135 const struct tune_params arm_cortex_a35_tune
=
2137 &cortexa53_extra_costs
,
2138 &generic_addr_mode_costs
, /* Addressing mode costs. */
2139 NULL
, /* Sched adj cost. */
2140 arm_default_branch_cost
,
2141 &arm_default_vec_cost
,
2142 1, /* Constant limit. */
2143 5, /* Max cond insns. */
2144 8, /* Memset max inline. */
2145 1, /* Issue rate. */
2146 ARM_PREFETCH_NOT_BENEFICIAL
,
2147 tune_params::PREF_CONST_POOL_FALSE
,
2148 tune_params::PREF_LDRD_FALSE
,
2149 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2150 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2151 tune_params::DISPARAGE_FLAGS_NEITHER
,
2152 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2153 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2154 tune_params::SCHED_AUTOPREF_OFF
2157 const struct tune_params arm_cortex_a53_tune
=
2159 &cortexa53_extra_costs
,
2160 &generic_addr_mode_costs
, /* Addressing mode costs. */
2161 NULL
, /* Sched adj cost. */
2162 arm_default_branch_cost
,
2163 &arm_default_vec_cost
,
2164 1, /* Constant limit. */
2165 5, /* Max cond insns. */
2166 8, /* Memset max inline. */
2167 2, /* Issue rate. */
2168 ARM_PREFETCH_NOT_BENEFICIAL
,
2169 tune_params::PREF_CONST_POOL_FALSE
,
2170 tune_params::PREF_LDRD_FALSE
,
2171 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2172 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2173 tune_params::DISPARAGE_FLAGS_NEITHER
,
2174 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2175 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2176 tune_params::SCHED_AUTOPREF_OFF
2179 const struct tune_params arm_cortex_a57_tune
=
2181 &cortexa57_extra_costs
,
2182 &generic_addr_mode_costs
, /* addressing mode costs */
2183 NULL
, /* Sched adj cost. */
2184 arm_default_branch_cost
,
2185 &arm_default_vec_cost
,
2186 1, /* Constant limit. */
2187 2, /* Max cond insns. */
2188 8, /* Memset max inline. */
2189 3, /* Issue rate. */
2190 ARM_PREFETCH_NOT_BENEFICIAL
,
2191 tune_params::PREF_CONST_POOL_FALSE
,
2192 tune_params::PREF_LDRD_TRUE
,
2193 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2194 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2195 tune_params::DISPARAGE_FLAGS_ALL
,
2196 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2197 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2198 tune_params::SCHED_AUTOPREF_FULL
2201 const struct tune_params arm_exynosm1_tune
=
2203 &exynosm1_extra_costs
,
2204 &generic_addr_mode_costs
, /* Addressing mode costs. */
2205 NULL
, /* Sched adj cost. */
2206 arm_default_branch_cost
,
2207 &arm_default_vec_cost
,
2208 1, /* Constant limit. */
2209 2, /* Max cond insns. */
2210 8, /* Memset max inline. */
2211 3, /* Issue rate. */
2212 ARM_PREFETCH_NOT_BENEFICIAL
,
2213 tune_params::PREF_CONST_POOL_FALSE
,
2214 tune_params::PREF_LDRD_TRUE
,
2215 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2216 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2217 tune_params::DISPARAGE_FLAGS_ALL
,
2218 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2219 tune_params::FUSE_NOTHING
,
2220 tune_params::SCHED_AUTOPREF_OFF
2223 const struct tune_params arm_xgene1_tune
=
2225 &xgene1_extra_costs
,
2226 &generic_addr_mode_costs
, /* Addressing mode costs. */
2227 NULL
, /* Sched adj cost. */
2228 arm_default_branch_cost
,
2229 &arm_default_vec_cost
,
2230 1, /* Constant limit. */
2231 2, /* Max cond insns. */
2232 32, /* Memset max inline. */
2233 4, /* Issue rate. */
2234 ARM_PREFETCH_NOT_BENEFICIAL
,
2235 tune_params::PREF_CONST_POOL_FALSE
,
2236 tune_params::PREF_LDRD_TRUE
,
2237 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2238 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2239 tune_params::DISPARAGE_FLAGS_ALL
,
2240 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2241 tune_params::FUSE_NOTHING
,
2242 tune_params::SCHED_AUTOPREF_OFF
2245 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2246 less appealing. Set max_insns_skipped to a low value. */
2248 const struct tune_params arm_cortex_a5_tune
=
2250 &cortexa5_extra_costs
,
2251 &generic_addr_mode_costs
, /* Addressing mode costs. */
2252 NULL
, /* Sched adj cost. */
2253 arm_cortex_a5_branch_cost
,
2254 &arm_default_vec_cost
,
2255 1, /* Constant limit. */
2256 1, /* Max cond insns. */
2257 8, /* Memset max inline. */
2258 2, /* Issue rate. */
2259 ARM_PREFETCH_NOT_BENEFICIAL
,
2260 tune_params::PREF_CONST_POOL_FALSE
,
2261 tune_params::PREF_LDRD_FALSE
,
2262 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2263 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2264 tune_params::DISPARAGE_FLAGS_NEITHER
,
2265 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2266 tune_params::FUSE_NOTHING
,
2267 tune_params::SCHED_AUTOPREF_OFF
2270 const struct tune_params arm_cortex_a9_tune
=
2272 &cortexa9_extra_costs
,
2273 &generic_addr_mode_costs
, /* Addressing mode costs. */
2274 cortex_a9_sched_adjust_cost
,
2275 arm_default_branch_cost
,
2276 &arm_default_vec_cost
,
2277 1, /* Constant limit. */
2278 5, /* Max cond insns. */
2279 8, /* Memset max inline. */
2280 2, /* Issue rate. */
2281 ARM_PREFETCH_BENEFICIAL(4,32,32),
2282 tune_params::PREF_CONST_POOL_FALSE
,
2283 tune_params::PREF_LDRD_FALSE
,
2284 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2285 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2286 tune_params::DISPARAGE_FLAGS_NEITHER
,
2287 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2288 tune_params::FUSE_NOTHING
,
2289 tune_params::SCHED_AUTOPREF_OFF
2292 const struct tune_params arm_cortex_a12_tune
=
2294 &cortexa12_extra_costs
,
2295 &generic_addr_mode_costs
, /* Addressing mode costs. */
2296 NULL
, /* Sched adj cost. */
2297 arm_default_branch_cost
,
2298 &arm_default_vec_cost
, /* Vectorizer costs. */
2299 1, /* Constant limit. */
2300 2, /* Max cond insns. */
2301 8, /* Memset max inline. */
2302 2, /* Issue rate. */
2303 ARM_PREFETCH_NOT_BENEFICIAL
,
2304 tune_params::PREF_CONST_POOL_FALSE
,
2305 tune_params::PREF_LDRD_TRUE
,
2306 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2307 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2308 tune_params::DISPARAGE_FLAGS_ALL
,
2309 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2310 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2311 tune_params::SCHED_AUTOPREF_OFF
2314 const struct tune_params arm_cortex_a73_tune
=
2316 &cortexa57_extra_costs
,
2317 &generic_addr_mode_costs
, /* Addressing mode costs. */
2318 NULL
, /* Sched adj cost. */
2319 arm_default_branch_cost
,
2320 &arm_default_vec_cost
, /* Vectorizer costs. */
2321 1, /* Constant limit. */
2322 2, /* Max cond insns. */
2323 8, /* Memset max inline. */
2324 2, /* Issue rate. */
2325 ARM_PREFETCH_NOT_BENEFICIAL
,
2326 tune_params::PREF_CONST_POOL_FALSE
,
2327 tune_params::PREF_LDRD_TRUE
,
2328 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2329 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2330 tune_params::DISPARAGE_FLAGS_ALL
,
2331 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2332 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2333 tune_params::SCHED_AUTOPREF_FULL
2336 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2337 cycle to execute each. An LDR from the constant pool also takes two cycles
2338 to execute, but mildly increases pipelining opportunity (consecutive
2339 loads/stores can be pipelined together, saving one cycle), and may also
2340 improve icache utilisation. Hence we prefer the constant pool for such
2343 const struct tune_params arm_v7m_tune
=
2346 &generic_addr_mode_costs
, /* Addressing mode costs. */
2347 NULL
, /* Sched adj cost. */
2348 arm_cortex_m_branch_cost
,
2349 &arm_default_vec_cost
,
2350 1, /* Constant limit. */
2351 2, /* Max cond insns. */
2352 8, /* Memset max inline. */
2353 1, /* Issue rate. */
2354 ARM_PREFETCH_NOT_BENEFICIAL
,
2355 tune_params::PREF_CONST_POOL_TRUE
,
2356 tune_params::PREF_LDRD_FALSE
,
2357 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2358 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2359 tune_params::DISPARAGE_FLAGS_NEITHER
,
2360 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2361 tune_params::FUSE_NOTHING
,
2362 tune_params::SCHED_AUTOPREF_OFF
2365 /* Cortex-M7 tuning. */
2367 const struct tune_params arm_cortex_m7_tune
=
2370 &generic_addr_mode_costs
, /* Addressing mode costs. */
2371 NULL
, /* Sched adj cost. */
2372 arm_cortex_m7_branch_cost
,
2373 &arm_default_vec_cost
,
2374 0, /* Constant limit. */
2375 1, /* Max cond insns. */
2376 8, /* Memset max inline. */
2377 2, /* Issue rate. */
2378 ARM_PREFETCH_NOT_BENEFICIAL
,
2379 tune_params::PREF_CONST_POOL_TRUE
,
2380 tune_params::PREF_LDRD_FALSE
,
2381 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2382 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2383 tune_params::DISPARAGE_FLAGS_NEITHER
,
2384 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2385 tune_params::FUSE_NOTHING
,
2386 tune_params::SCHED_AUTOPREF_OFF
2389 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2390 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2392 const struct tune_params arm_v6m_tune
=
2394 &generic_extra_costs
, /* Insn extra costs. */
2395 &generic_addr_mode_costs
, /* Addressing mode costs. */
2396 NULL
, /* Sched adj cost. */
2397 arm_default_branch_cost
,
2398 &arm_default_vec_cost
, /* Vectorizer costs. */
2399 1, /* Constant limit. */
2400 5, /* Max cond insns. */
2401 8, /* Memset max inline. */
2402 1, /* Issue rate. */
2403 ARM_PREFETCH_NOT_BENEFICIAL
,
2404 tune_params::PREF_CONST_POOL_FALSE
,
2405 tune_params::PREF_LDRD_FALSE
,
2406 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2407 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2408 tune_params::DISPARAGE_FLAGS_NEITHER
,
2409 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2410 tune_params::FUSE_NOTHING
,
2411 tune_params::SCHED_AUTOPREF_OFF
2414 const struct tune_params arm_fa726te_tune
=
2416 &generic_extra_costs
, /* Insn extra costs. */
2417 &generic_addr_mode_costs
, /* Addressing mode costs. */
2418 fa726te_sched_adjust_cost
,
2419 arm_default_branch_cost
,
2420 &arm_default_vec_cost
,
2421 1, /* Constant limit. */
2422 5, /* Max cond insns. */
2423 8, /* Memset max inline. */
2424 2, /* Issue rate. */
2425 ARM_PREFETCH_NOT_BENEFICIAL
,
2426 tune_params::PREF_CONST_POOL_TRUE
,
2427 tune_params::PREF_LDRD_FALSE
,
2428 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2429 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2430 tune_params::DISPARAGE_FLAGS_NEITHER
,
2431 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2432 tune_params::FUSE_NOTHING
,
2433 tune_params::SCHED_AUTOPREF_OFF
2436 char *accepted_branch_protection_string
= NULL
;
2438 /* Auto-generated CPU, FPU and architecture tables. */
2439 #include "arm-cpu-data.h"
2441 /* The name of the preprocessor macro to define for this architecture. PROFILE
2442 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2443 is thus chosen to be big enough to hold the longest architecture name. */
2445 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2447 /* Supported TLS relocations. */
2458 TLS_DESCSEQ
/* GNU scheme */
2461 /* The maximum number of insns to be used when loading a constant. */
2463 arm_constant_limit (bool size_p
)
2465 return size_p
? 1 : current_tune
->constant_limit
;
2468 /* Emit an insn that's a simple single-set. Both the operands must be known
2470 inline static rtx_insn
*
2471 emit_set_insn (rtx x
, rtx y
)
2473 return emit_insn (gen_rtx_SET (x
, y
));
2476 /* Return the number of bits set in VALUE. */
2478 bit_count (unsigned long value
)
2480 unsigned long count
= 0;
2485 value
&= value
- 1; /* Clear the least-significant set bit. */
2491 /* Return the number of bits set in BMAP. */
2493 bitmap_popcount (const sbitmap bmap
)
2495 unsigned int count
= 0;
2497 sbitmap_iterator sbi
;
2499 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2508 } arm_fixed_mode_set
;
2510 /* A small helper for setting fixed-point library libfuncs. */
2513 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2514 const char *funcname
, const char *modename
,
2519 if (num_suffix
== 0)
2520 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2522 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2524 set_optab_libfunc (optable
, mode
, buffer
);
2528 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2529 machine_mode from
, const char *funcname
,
2530 const char *toname
, const char *fromname
)
2533 const char *maybe_suffix_2
= "";
2535 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2536 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2537 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2538 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2539 maybe_suffix_2
= "2";
2541 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2544 set_conv_libfunc (optable
, to
, from
, buffer
);
2547 static GTY(()) rtx speculation_barrier_libfunc
;
2549 /* Record that we have no arithmetic or comparison libfuncs for
2550 machine mode MODE. */
2553 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode
)
2556 set_optab_libfunc (add_optab
, mode
, NULL
);
2557 set_optab_libfunc (sdiv_optab
, mode
, NULL
);
2558 set_optab_libfunc (smul_optab
, mode
, NULL
);
2559 set_optab_libfunc (neg_optab
, mode
, NULL
);
2560 set_optab_libfunc (sub_optab
, mode
, NULL
);
2563 set_optab_libfunc (eq_optab
, mode
, NULL
);
2564 set_optab_libfunc (ne_optab
, mode
, NULL
);
2565 set_optab_libfunc (lt_optab
, mode
, NULL
);
2566 set_optab_libfunc (le_optab
, mode
, NULL
);
2567 set_optab_libfunc (ge_optab
, mode
, NULL
);
2568 set_optab_libfunc (gt_optab
, mode
, NULL
);
2569 set_optab_libfunc (unord_optab
, mode
, NULL
);
2572 /* Set up library functions unique to ARM. */
2574 arm_init_libfuncs (void)
2576 machine_mode mode_iter
;
2578 /* For Linux, we have access to kernel support for atomic operations. */
2579 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2580 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2582 /* There are no special library functions unless we are using the
2587 /* The functions below are described in Section 4 of the "Run-Time
2588 ABI for the ARM architecture", Version 1.0. */
2590 /* Double-precision floating-point arithmetic. Table 2. */
2591 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2592 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2593 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2594 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2595 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2597 /* Double-precision comparisons. Table 3. */
2598 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2599 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2600 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2601 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2602 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2603 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2604 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2606 /* Single-precision floating-point arithmetic. Table 4. */
2607 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2608 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2609 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2610 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2611 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2613 /* Single-precision comparisons. Table 5. */
2614 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2615 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2616 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2617 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2618 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2619 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2620 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2622 /* Floating-point to integer conversions. Table 6. */
2623 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2624 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2625 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2626 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2627 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2628 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2629 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2630 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2632 /* Conversions between floating types. Table 7. */
2633 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2634 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2636 /* Integer to floating-point conversions. Table 8. */
2637 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2638 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2639 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2640 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2641 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2642 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2643 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2644 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2646 /* Long long. Table 9. */
2647 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2648 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2649 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2650 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2651 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2652 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2653 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2654 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2656 /* Integer (32/32->32) division. \S 4.3.1. */
2657 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2658 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2660 /* The divmod functions are designed so that they can be used for
2661 plain division, even though they return both the quotient and the
2662 remainder. The quotient is returned in the usual location (i.e.,
2663 r0 for SImode, {r0, r1} for DImode), just as would be expected
2664 for an ordinary division routine. Because the AAPCS calling
2665 conventions specify that all of { r0, r1, r2, r3 } are
2666 callee-saved registers, there is no need to tell the compiler
2667 explicitly that those registers are clobbered by these
2669 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2670 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2672 /* For SImode division the ABI provides div-without-mod routines,
2673 which are faster. */
2674 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2675 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2677 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2678 divmod libcalls instead. */
2679 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2680 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2681 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2682 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2684 /* Half-precision float operations. The compiler handles all operations
2685 with NULL libfuncs by converting the SFmode. */
2686 switch (arm_fp16_format
)
2688 case ARM_FP16_FORMAT_IEEE
:
2689 case ARM_FP16_FORMAT_ALTERNATIVE
:
2692 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2693 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2695 : "__gnu_f2h_alternative"));
2696 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2697 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2699 : "__gnu_h2f_alternative"));
2701 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2702 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2704 : "__gnu_d2h_alternative"));
2706 arm_block_arith_comp_libfuncs_for_mode (HFmode
);
2713 /* For all possible libcalls in BFmode, record NULL. */
2714 FOR_EACH_MODE_IN_CLASS (mode_iter
, MODE_FLOAT
)
2716 set_conv_libfunc (trunc_optab
, BFmode
, mode_iter
, NULL
);
2717 set_conv_libfunc (trunc_optab
, mode_iter
, BFmode
, NULL
);
2718 set_conv_libfunc (sext_optab
, mode_iter
, BFmode
, NULL
);
2719 set_conv_libfunc (sext_optab
, BFmode
, mode_iter
, NULL
);
2721 arm_block_arith_comp_libfuncs_for_mode (BFmode
);
2723 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2725 const arm_fixed_mode_set fixed_arith_modes
[] =
2728 { E_UQQmode
, "uqq" },
2730 { E_UHQmode
, "uhq" },
2732 { E_USQmode
, "usq" },
2734 { E_UDQmode
, "udq" },
2736 { E_UTQmode
, "utq" },
2738 { E_UHAmode
, "uha" },
2740 { E_USAmode
, "usa" },
2742 { E_UDAmode
, "uda" },
2744 { E_UTAmode
, "uta" }
2746 const arm_fixed_mode_set fixed_conv_modes
[] =
2749 { E_UQQmode
, "uqq" },
2751 { E_UHQmode
, "uhq" },
2753 { E_USQmode
, "usq" },
2755 { E_UDQmode
, "udq" },
2757 { E_UTQmode
, "utq" },
2759 { E_UHAmode
, "uha" },
2761 { E_USAmode
, "usa" },
2763 { E_UDAmode
, "uda" },
2765 { E_UTAmode
, "uta" },
2776 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2778 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2779 "add", fixed_arith_modes
[i
].name
, 3);
2780 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2781 "ssadd", fixed_arith_modes
[i
].name
, 3);
2782 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2783 "usadd", fixed_arith_modes
[i
].name
, 3);
2784 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2785 "sub", fixed_arith_modes
[i
].name
, 3);
2786 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2787 "sssub", fixed_arith_modes
[i
].name
, 3);
2788 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2789 "ussub", fixed_arith_modes
[i
].name
, 3);
2790 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2791 "mul", fixed_arith_modes
[i
].name
, 3);
2792 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2793 "ssmul", fixed_arith_modes
[i
].name
, 3);
2794 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2795 "usmul", fixed_arith_modes
[i
].name
, 3);
2796 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2797 "div", fixed_arith_modes
[i
].name
, 3);
2798 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2799 "udiv", fixed_arith_modes
[i
].name
, 3);
2800 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2801 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2802 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2803 "usdiv", fixed_arith_modes
[i
].name
, 3);
2804 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2805 "neg", fixed_arith_modes
[i
].name
, 2);
2806 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2807 "ssneg", fixed_arith_modes
[i
].name
, 2);
2808 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2809 "usneg", fixed_arith_modes
[i
].name
, 2);
2810 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2811 "ashl", fixed_arith_modes
[i
].name
, 3);
2812 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2813 "ashr", fixed_arith_modes
[i
].name
, 3);
2814 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2815 "lshr", fixed_arith_modes
[i
].name
, 3);
2816 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2817 "ssashl", fixed_arith_modes
[i
].name
, 3);
2818 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2819 "usashl", fixed_arith_modes
[i
].name
, 3);
2820 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2821 "cmp", fixed_arith_modes
[i
].name
, 2);
2824 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2825 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2828 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2829 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2832 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2833 fixed_conv_modes
[j
].mode
, "fract",
2834 fixed_conv_modes
[i
].name
,
2835 fixed_conv_modes
[j
].name
);
2836 arm_set_fixed_conv_libfunc (satfract_optab
,
2837 fixed_conv_modes
[i
].mode
,
2838 fixed_conv_modes
[j
].mode
, "satfract",
2839 fixed_conv_modes
[i
].name
,
2840 fixed_conv_modes
[j
].name
);
2841 arm_set_fixed_conv_libfunc (fractuns_optab
,
2842 fixed_conv_modes
[i
].mode
,
2843 fixed_conv_modes
[j
].mode
, "fractuns",
2844 fixed_conv_modes
[i
].name
,
2845 fixed_conv_modes
[j
].name
);
2846 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2847 fixed_conv_modes
[i
].mode
,
2848 fixed_conv_modes
[j
].mode
, "satfractuns",
2849 fixed_conv_modes
[i
].name
,
2850 fixed_conv_modes
[j
].name
);
2854 if (TARGET_AAPCS_BASED
)
2855 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2857 speculation_barrier_libfunc
= init_one_libfunc ("__speculation_barrier");
2860 /* Implement TARGET_GIMPLE_FOLD_BUILTIN. */
2862 arm_gimple_fold_builtin (gimple_stmt_iterator
*gsi
)
2864 gcall
*stmt
= as_a
<gcall
*> (gsi_stmt (*gsi
));
2865 tree fndecl
= gimple_call_fndecl (stmt
);
2866 unsigned int code
= DECL_MD_FUNCTION_CODE (fndecl
);
2867 unsigned int subcode
= code
>> ARM_BUILTIN_SHIFT
;
2868 gimple
*new_stmt
= NULL
;
2869 switch (code
& ARM_BUILTIN_CLASS
)
2871 case ARM_BUILTIN_GENERAL
:
2873 case ARM_BUILTIN_MVE
:
2874 new_stmt
= arm_mve::gimple_fold_builtin (subcode
, stmt
);
2879 gsi_replace (gsi
, new_stmt
, true);
2883 /* On AAPCS systems, this is the "struct __va_list". */
2884 static GTY(()) tree va_list_type
;
2886 /* Return the type to use as __builtin_va_list. */
2888 arm_build_builtin_va_list (void)
2893 if (!TARGET_AAPCS_BASED
)
2894 return std_build_builtin_va_list ();
2896 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2904 The C Library ABI further reinforces this definition in \S
2907 We must follow this definition exactly. The structure tag
2908 name is visible in C++ mangled names, and thus forms a part
2909 of the ABI. The field name may be used by people who
2910 #include <stdarg.h>. */
2911 /* Create the type. */
2912 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2913 /* Give it the required name. */
2914 va_list_name
= build_decl (BUILTINS_LOCATION
,
2916 get_identifier ("__va_list"),
2918 DECL_ARTIFICIAL (va_list_name
) = 1;
2919 TYPE_NAME (va_list_type
) = va_list_name
;
2920 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2921 /* Create the __ap field. */
2922 ap_field
= build_decl (BUILTINS_LOCATION
,
2924 get_identifier ("__ap"),
2926 DECL_ARTIFICIAL (ap_field
) = 1;
2927 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2928 TYPE_FIELDS (va_list_type
) = ap_field
;
2929 /* Compute its layout. */
2930 layout_type (va_list_type
);
2932 return va_list_type
;
2935 /* Return an expression of type "void *" pointing to the next
2936 available argument in a variable-argument list. VALIST is the
2937 user-level va_list object, of type __builtin_va_list. */
2939 arm_extract_valist_ptr (tree valist
)
2941 if (TREE_TYPE (valist
) == error_mark_node
)
2942 return error_mark_node
;
2944 /* On an AAPCS target, the pointer is stored within "struct
2946 if (TARGET_AAPCS_BASED
)
2948 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2949 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2950 valist
, ap_field
, NULL_TREE
);
2956 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2958 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2960 valist
= arm_extract_valist_ptr (valist
);
2961 std_expand_builtin_va_start (valist
, nextarg
);
2964 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2966 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2969 valist
= arm_extract_valist_ptr (valist
);
2970 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2973 /* Check any incompatible options that the user has specified. */
2975 arm_option_check_internal (struct gcc_options
*opts
)
2977 int flags
= opts
->x_target_flags
;
2979 /* iWMMXt and NEON are incompatible. */
2981 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2982 error ("iWMMXt and NEON are incompatible");
2984 /* Make sure that the processor choice does not conflict with any of the
2985 other command line choices. */
2986 if (TARGET_ARM_P (flags
)
2987 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2988 error ("target CPU does not support ARM mode");
2990 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2991 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2992 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2994 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2995 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2997 /* If this target is normally configured to use APCS frames, warn if they
2998 are turned off and debugging is turned on. */
2999 if (TARGET_ARM_P (flags
)
3000 && write_symbols
!= NO_DEBUG
3001 && !TARGET_APCS_FRAME
3002 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
3003 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
3006 /* iWMMXt unsupported under Thumb mode. */
3007 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
3008 error ("iWMMXt unsupported under Thumb mode");
3010 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
3011 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
3013 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
3015 error ("RTP PIC is incompatible with Thumb");
3019 if (target_pure_code
|| target_slow_flash_data
)
3021 const char *flag
= (target_pure_code
? "-mpure-code" :
3022 "-mslow-flash-data");
3023 bool common_unsupported_modes
= arm_arch_notm
|| flag_pic
|| TARGET_NEON
;
3025 /* We only support -mslow-flash-data on M-profile targets with
3027 if (target_slow_flash_data
&& (!TARGET_HAVE_MOVT
|| common_unsupported_modes
))
3028 error ("%s only supports non-pic code on M-profile targets with the "
3029 "MOVT instruction", flag
);
3031 /* We only support -mpure-code on M-profile targets. */
3032 if (target_pure_code
&& common_unsupported_modes
)
3033 error ("%s only supports non-pic code on M-profile targets", flag
);
3035 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
3036 -mword-relocations forbids relocation of MOVT/MOVW. */
3037 if (target_word_relocations
)
3038 error ("%s incompatible with %<-mword-relocations%>", flag
);
3042 /* Recompute the global settings depending on target attribute options. */
3045 arm_option_params_internal (void)
3047 /* If we are not using the default (ARM mode) section anchor offset
3048 ranges, then set the correct ranges now. */
3051 /* Thumb-1 LDR instructions cannot have negative offsets.
3052 Permissible positive offset ranges are 5-bit (for byte loads),
3053 6-bit (for halfword loads), or 7-bit (for word loads).
3054 Empirical results suggest a 7-bit anchor range gives the best
3055 overall code size. */
3056 targetm
.min_anchor_offset
= 0;
3057 targetm
.max_anchor_offset
= 127;
3059 else if (TARGET_THUMB2
)
3061 /* The minimum is set such that the total size of the block
3062 for a particular anchor is 248 + 1 + 4095 bytes, which is
3063 divisible by eight, ensuring natural spacing of anchors. */
3064 targetm
.min_anchor_offset
= -248;
3065 targetm
.max_anchor_offset
= 4095;
3069 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
3070 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
3073 /* Increase the number of conditional instructions with -Os. */
3074 max_insns_skipped
= optimize_size
? 4 : current_tune
->max_insns_skipped
;
3076 /* For THUMB2, we limit the conditional sequence to one IT block. */
3078 max_insns_skipped
= MIN (max_insns_skipped
, MAX_INSN_PER_IT_BLOCK
);
3081 targetm
.md_asm_adjust
= thumb1_md_asm_adjust
;
3083 targetm
.md_asm_adjust
= arm_md_asm_adjust
;
3086 /* True if -mflip-thumb should next add an attribute for the default
3087 mode, false if it should next add an attribute for the opposite mode. */
3088 static GTY(()) bool thumb_flipper
;
3090 /* Options after initial target override. */
3091 static GTY(()) tree init_optimize
;
3094 arm_override_options_after_change_1 (struct gcc_options
*opts
,
3095 struct gcc_options
*opts_set
)
3097 /* -falign-functions without argument: supply one. */
3098 if (opts
->x_flag_align_functions
&& !opts_set
->x_str_align_functions
)
3099 opts
->x_str_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
3100 && opts
->x_optimize_size
? "2" : "4";
3103 /* Implement targetm.override_options_after_change. */
3106 arm_override_options_after_change (void)
3108 arm_override_options_after_change_1 (&global_options
, &global_options_set
);
3111 /* Implement TARGET_OPTION_RESTORE. */
3113 arm_option_restore (struct gcc_options */
* opts */
,
3114 struct gcc_options */
* opts_set */
,
3115 struct cl_target_option
*ptr
)
3117 arm_configure_build_target (&arm_active_target
, ptr
, false);
3118 arm_option_reconfigure_globals ();
3121 /* Reset options between modes that the user has specified. */
3123 arm_option_override_internal (struct gcc_options
*opts
,
3124 struct gcc_options
*opts_set
)
3126 arm_override_options_after_change_1 (opts
, opts_set
);
3128 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3130 /* The default is to enable interworking, so this warning message would
3131 be confusing to users who have just compiled with
3132 eg, -march=armv4. */
3133 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3134 opts
->x_target_flags
&= ~MASK_INTERWORK
;
3137 if (TARGET_THUMB_P (opts
->x_target_flags
)
3138 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3140 warning (0, "target CPU does not support THUMB instructions");
3141 opts
->x_target_flags
&= ~MASK_THUMB
;
3144 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
3146 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3147 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
3150 /* Callee super interworking implies thumb interworking. Adding
3151 this to the flags here simplifies the logic elsewhere. */
3152 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
3153 opts
->x_target_flags
|= MASK_INTERWORK
;
3155 /* need to remember initial values so combinaisons of options like
3156 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3157 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
3159 if (! opts_set
->x_arm_restrict_it
)
3160 opts
->x_arm_restrict_it
= arm_arch8
;
3162 /* ARM execution state and M profile don't have [restrict] IT. */
3163 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
3164 opts
->x_arm_restrict_it
= 0;
3166 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3167 if (!opts_set
->x_arm_restrict_it
3168 && (opts_set
->x_arm_cpu_string
|| opts_set
->x_arm_tune_string
))
3169 opts
->x_arm_restrict_it
= 0;
3171 /* Enable -munaligned-access by default for
3172 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3173 i.e. Thumb2 and ARM state only.
3174 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3175 - ARMv8 architecture-base processors.
3177 Disable -munaligned-access by default for
3178 - all pre-ARMv6 architecture-based processors
3179 - ARMv6-M architecture-based processors
3180 - ARMv8-M Baseline processors. */
3182 if (! opts_set
->x_unaligned_access
)
3184 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
3185 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
3187 else if (opts
->x_unaligned_access
== 1
3188 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3190 warning (0, "target CPU does not support unaligned accesses");
3191 opts
->x_unaligned_access
= 0;
3194 /* Don't warn since it's on by default in -O2. */
3195 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3196 opts
->x_flag_schedule_insns
= 0;
3198 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3200 /* Disable shrink-wrap when optimizing function for size, since it tends to
3201 generate additional returns. */
3202 if (optimize_function_for_size_p (cfun
)
3203 && TARGET_THUMB2_P (opts
->x_target_flags
))
3204 opts
->x_flag_shrink_wrap
= false;
3206 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3208 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3209 - epilogue_insns - does not accurately model the corresponding insns
3210 emitted in the asm file. In particular, see the comment in thumb_exit
3211 'Find out how many of the (return) argument registers we can corrupt'.
3212 As a consequence, the epilogue may clobber registers without fipa-ra
3213 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3214 TODO: Accurately model clobbers for epilogue_insns and reenable
3216 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3217 opts
->x_flag_ipa_ra
= 0;
3219 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3221 /* Thumb2 inline assembly code should always use unified syntax.
3222 This will apply to ARM and Thumb1 eventually. */
3223 if (TARGET_THUMB2_P (opts
->x_target_flags
))
3224 opts
->x_inline_asm_unified
= true;
3226 if (arm_stack_protector_guard
== SSP_GLOBAL
3227 && opts
->x_arm_stack_protector_guard_offset_str
)
3229 error ("incompatible options %<-mstack-protector-guard=global%> and "
3230 "%<-mstack-protector-guard-offset=%s%>",
3231 arm_stack_protector_guard_offset_str
);
3234 if (opts
->x_arm_stack_protector_guard_offset_str
)
3237 const char *str
= arm_stack_protector_guard_offset_str
;
3239 long offs
= strtol (arm_stack_protector_guard_offset_str
, &end
, 0);
3240 if (!*str
|| *end
|| errno
)
3241 error ("%qs is not a valid offset in %qs", str
,
3242 "-mstack-protector-guard-offset=");
3243 arm_stack_protector_guard_offset
= offs
;
3246 if (arm_current_function_pac_enabled_p ())
3248 if (!arm_arch8m_main
)
3249 error ("This architecture does not support branch protection "
3251 if (TARGET_TPCS_FRAME
)
3252 sorry ("Return address signing is not supported with %<-mtpcs-frame%>.");
3255 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3256 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3260 static sbitmap isa_all_fpubits_internal
;
3261 static sbitmap isa_all_fpbits
;
3262 static sbitmap isa_quirkbits
;
3264 /* Configure a build target TARGET from the user-specified options OPTS and
3265 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3266 architecture have been specified, but the two are not identical. */
3268 arm_configure_build_target (struct arm_build_target
*target
,
3269 struct cl_target_option
*opts
,
3270 bool warn_compatible
)
3272 const cpu_option
*arm_selected_tune
= NULL
;
3273 const arch_option
*arm_selected_arch
= NULL
;
3274 const cpu_option
*arm_selected_cpu
= NULL
;
3275 const arm_fpu_desc
*arm_selected_fpu
= NULL
;
3276 const char *tune_opts
= NULL
;
3277 const char *arch_opts
= NULL
;
3278 const char *cpu_opts
= NULL
;
3280 bitmap_clear (target
->isa
);
3281 target
->core_name
= NULL
;
3282 target
->arch_name
= NULL
;
3284 if (opts
->x_arm_arch_string
)
3286 arm_selected_arch
= arm_parse_arch_option_name (all_architectures
,
3288 opts
->x_arm_arch_string
);
3289 arch_opts
= strchr (opts
->x_arm_arch_string
, '+');
3292 if (opts
->x_arm_cpu_string
)
3294 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "-mcpu",
3295 opts
->x_arm_cpu_string
);
3296 cpu_opts
= strchr (opts
->x_arm_cpu_string
, '+');
3297 arm_selected_tune
= arm_selected_cpu
;
3298 /* If taking the tuning from -mcpu, we don't need to rescan the
3299 options for tuning. */
3302 if (opts
->x_arm_tune_string
)
3304 arm_selected_tune
= arm_parse_cpu_option_name (all_cores
, "-mtune",
3305 opts
->x_arm_tune_string
);
3306 tune_opts
= strchr (opts
->x_arm_tune_string
, '+');
3309 if (opts
->x_arm_branch_protection_string
)
3311 aarch_validate_mbranch_protection (opts
->x_arm_branch_protection_string
);
3313 if (aarch_ra_sign_key
!= AARCH_KEY_A
)
3315 warning (0, "invalid key type for %<-mbranch-protection=%>");
3316 aarch_ra_sign_key
= AARCH_KEY_A
;
3320 if (arm_selected_arch
)
3322 arm_initialize_isa (target
->isa
, arm_selected_arch
->common
.isa_bits
);
3323 arm_parse_option_features (target
->isa
, &arm_selected_arch
->common
,
3326 if (arm_selected_cpu
)
3328 auto_sbitmap
cpu_isa (isa_num_bits
);
3329 auto_sbitmap
isa_delta (isa_num_bits
);
3331 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->common
.isa_bits
);
3332 arm_parse_option_features (cpu_isa
, &arm_selected_cpu
->common
,
3334 bitmap_xor (isa_delta
, cpu_isa
, target
->isa
);
3335 /* Ignore any bits that are quirk bits. */
3336 bitmap_and_compl (isa_delta
, isa_delta
, isa_quirkbits
);
3337 /* If the user (or the default configuration) has specified a
3338 specific FPU, then ignore any bits that depend on the FPU
3339 configuration. Do similarly if using the soft-float
3341 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
3342 || arm_float_abi
== ARM_FLOAT_ABI_SOFT
)
3343 bitmap_and_compl (isa_delta
, isa_delta
, isa_all_fpbits
);
3345 if (!bitmap_empty_p (isa_delta
))
3347 if (warn_compatible
)
3348 warning (0, "switch %<-mcpu=%s%> conflicts "
3349 "with switch %<-march=%s%>",
3350 opts
->x_arm_cpu_string
,
3351 opts
->x_arm_arch_string
);
3353 /* -march wins for code generation.
3354 -mcpu wins for default tuning. */
3355 if (!arm_selected_tune
)
3356 arm_selected_tune
= arm_selected_cpu
;
3358 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3359 target
->arch_name
= arm_selected_arch
->common
.name
;
3363 /* Architecture and CPU are essentially the same.
3364 Prefer the CPU setting. */
3365 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3366 target
->core_name
= arm_selected_cpu
->common
.name
;
3367 /* Copy the CPU's capabilities, so that we inherit the
3368 appropriate extensions and quirks. */
3369 bitmap_copy (target
->isa
, cpu_isa
);
3374 /* Pick a CPU based on the architecture. */
3375 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3376 target
->arch_name
= arm_selected_arch
->common
.name
;
3377 /* Note: target->core_name is left unset in this path. */
3380 else if (arm_selected_cpu
)
3382 target
->core_name
= arm_selected_cpu
->common
.name
;
3383 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3384 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3386 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3388 /* If the user did not specify a processor or architecture, choose
3392 const cpu_option
*sel
;
3393 auto_sbitmap
sought_isa (isa_num_bits
);
3394 bitmap_clear (sought_isa
);
3395 auto_sbitmap
default_isa (isa_num_bits
);
3397 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "default CPU",
3398 TARGET_CPU_DEFAULT
);
3399 cpu_opts
= strchr (TARGET_CPU_DEFAULT
, '+');
3400 gcc_assert (arm_selected_cpu
->common
.name
);
3402 /* RWE: All of the selection logic below (to the end of this
3403 'if' clause) looks somewhat suspect. It appears to be mostly
3404 there to support forcing thumb support when the default CPU
3405 does not have thumb (somewhat dubious in terms of what the
3406 user might be expecting). I think it should be removed once
3407 support for the pre-thumb era cores is removed. */
3408 sel
= arm_selected_cpu
;
3409 arm_initialize_isa (default_isa
, sel
->common
.isa_bits
);
3410 arm_parse_option_features (default_isa
, &arm_selected_cpu
->common
,
3413 /* Now check to see if the user has specified any command line
3414 switches that require certain abilities from the cpu. */
3416 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3417 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3419 /* If there are such requirements and the default CPU does not
3420 satisfy them, we need to run over the complete list of
3421 cores looking for one that is satisfactory. */
3422 if (!bitmap_empty_p (sought_isa
)
3423 && !bitmap_subset_p (sought_isa
, default_isa
))
3425 auto_sbitmap
candidate_isa (isa_num_bits
);
3426 /* We're only interested in a CPU with at least the
3427 capabilities of the default CPU and the required
3428 additional features. */
3429 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3431 /* Try to locate a CPU type that supports all of the abilities
3432 of the default CPU, plus the extra abilities requested by
3434 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3436 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3437 /* An exact match? */
3438 if (bitmap_equal_p (default_isa
, candidate_isa
))
3442 if (sel
->common
.name
== NULL
)
3444 unsigned current_bit_count
= isa_num_bits
;
3445 const cpu_option
*best_fit
= NULL
;
3447 /* Ideally we would like to issue an error message here
3448 saying that it was not possible to find a CPU compatible
3449 with the default CPU, but which also supports the command
3450 line options specified by the programmer, and so they
3451 ought to use the -mcpu=<name> command line option to
3452 override the default CPU type.
3454 If we cannot find a CPU that has exactly the
3455 characteristics of the default CPU and the given
3456 command line options we scan the array again looking
3457 for a best match. The best match must have at least
3458 the capabilities of the perfect match. */
3459 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3461 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3463 if (bitmap_subset_p (default_isa
, candidate_isa
))
3467 bitmap_and_compl (candidate_isa
, candidate_isa
,
3469 count
= bitmap_popcount (candidate_isa
);
3471 if (count
< current_bit_count
)
3474 current_bit_count
= count
;
3478 gcc_assert (best_fit
);
3482 arm_selected_cpu
= sel
;
3485 /* Now we know the CPU, we can finally initialize the target
3487 target
->core_name
= arm_selected_cpu
->common
.name
;
3488 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3489 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3491 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3494 gcc_assert (arm_selected_cpu
);
3495 gcc_assert (arm_selected_arch
);
3497 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3499 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3500 auto_sbitmap
fpu_bits (isa_num_bits
);
3502 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3503 /* This should clear out ALL bits relating to the FPU/simd
3504 extensions, to avoid potentially invalid combinations later on
3505 that we can't match. At present we only clear out those bits
3506 that can be set by -mfpu. This should be fixed in GCC-12. */
3507 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits_internal
);
3508 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3511 /* If we have the soft-float ABI, clear any feature bits relating to use of
3512 floating-point operations. They'll just confuse things later on. */
3513 if (arm_float_abi
== ARM_FLOAT_ABI_SOFT
)
3514 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpbits
);
3516 /* There may be implied bits which we still need to enable. These are
3517 non-named features which are needed to complete other sets of features,
3518 but cannot be enabled from arm-cpus.in due to being shared between
3519 multiple fgroups. Each entry in all_implied_fbits is of the form
3520 ante -> cons, meaning that if the feature "ante" is enabled, we should
3521 implicitly enable "cons". */
3522 const struct fbit_implication
*impl
= all_implied_fbits
;
3525 if (bitmap_bit_p (target
->isa
, impl
->ante
))
3526 bitmap_set_bit (target
->isa
, impl
->cons
);
3530 if (!arm_selected_tune
)
3531 arm_selected_tune
= arm_selected_cpu
;
3532 else /* Validate the features passed to -mtune. */
3533 arm_parse_option_features (NULL
, &arm_selected_tune
->common
, tune_opts
);
3535 const cpu_tune
*tune_data
= &all_tunes
[arm_selected_tune
- all_cores
];
3537 /* Finish initializing the target structure. */
3538 if (!target
->arch_name
)
3539 target
->arch_name
= arm_selected_arch
->common
.name
;
3540 target
->arch_pp_name
= arm_selected_arch
->arch
;
3541 target
->base_arch
= arm_selected_arch
->base_arch
;
3542 target
->profile
= arm_selected_arch
->profile
;
3544 target
->tune_flags
= tune_data
->tune_flags
;
3545 target
->tune
= tune_data
->tune
;
3546 target
->tune_core
= tune_data
->scheduler
;
3549 /* Fix up any incompatible options that the user has specified. */
3551 arm_option_override (void)
3553 static const enum isa_feature fpu_bitlist_internal
[]
3554 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
3555 /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main. */
3556 static const enum isa_feature fp_bitlist
[]
3557 = { ISA_ALL_FP
, isa_bit_mve_float
, isa_nobit
};
3558 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3559 cl_target_option opts
;
3561 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3562 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3564 isa_all_fpubits_internal
= sbitmap_alloc (isa_num_bits
);
3565 isa_all_fpbits
= sbitmap_alloc (isa_num_bits
);
3566 arm_initialize_isa (isa_all_fpubits_internal
, fpu_bitlist_internal
);
3567 arm_initialize_isa (isa_all_fpbits
, fp_bitlist
);
3569 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3571 if (!OPTION_SET_P (arm_fpu_index
))
3576 ok
= opt_enum_arg_to_value (OPT_mfpu_
, FPUTYPE_AUTO
, &fpu_index
,
3579 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3582 cl_target_option_save (&opts
, &global_options
, &global_options_set
);
3583 arm_configure_build_target (&arm_active_target
, &opts
, true);
3585 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3586 SUBTARGET_OVERRIDE_OPTIONS
;
3589 /* Initialize boolean versions of the architectural flags, for use
3590 in the arm.md file and for enabling feature flags. */
3591 arm_option_reconfigure_globals ();
3593 arm_tune
= arm_active_target
.tune_core
;
3594 tune_flags
= arm_active_target
.tune_flags
;
3595 current_tune
= arm_active_target
.tune
;
3597 /* TBD: Dwarf info for apcs frame is not handled yet. */
3598 if (TARGET_APCS_FRAME
)
3599 flag_shrink_wrap
= false;
3601 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3603 warning (0, "%<-mapcs-stack-check%> incompatible with "
3604 "%<-mno-apcs-frame%>");
3605 target_flags
|= MASK_APCS_FRAME
;
3608 if (TARGET_POKE_FUNCTION_NAME
)
3609 target_flags
|= MASK_APCS_FRAME
;
3611 if (TARGET_APCS_REENT
&& flag_pic
)
3612 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3614 if (TARGET_APCS_REENT
)
3615 warning (0, "APCS reentrant code not supported. Ignored");
3617 /* Set up some tuning parameters. */
3618 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3619 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3620 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3621 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3622 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3623 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3625 /* For arm2/3 there is no need to do any scheduling if we are doing
3626 software floating-point. */
3627 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3628 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3630 /* Override the default structure alignment for AAPCS ABI. */
3631 if (!OPTION_SET_P (arm_structure_size_boundary
))
3633 if (TARGET_AAPCS_BASED
)
3634 arm_structure_size_boundary
= 8;
3638 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3640 if (arm_structure_size_boundary
!= 8
3641 && arm_structure_size_boundary
!= 32
3642 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3644 if (ARM_DOUBLEWORD_ALIGN
)
3646 "structure size boundary can only be set to 8, 32 or 64");
3648 warning (0, "structure size boundary can only be set to 8 or 32");
3649 arm_structure_size_boundary
3650 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3654 if (TARGET_VXWORKS_RTP
)
3656 if (!OPTION_SET_P (arm_pic_data_is_text_relative
))
3657 arm_pic_data_is_text_relative
= 0;
3660 && !arm_pic_data_is_text_relative
3661 && !(OPTION_SET_P (target_flags
) & MASK_SINGLE_PIC_BASE
))
3662 /* When text & data segments don't have a fixed displacement, the
3663 intended use is with a single, read only, pic base register.
3664 Unless the user explicitly requested not to do that, set
3666 target_flags
|= MASK_SINGLE_PIC_BASE
;
3668 /* If stack checking is disabled, we can use r10 as the PIC register,
3669 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3670 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3672 if (TARGET_VXWORKS_RTP
)
3673 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3674 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3677 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3678 arm_pic_register
= 9;
3680 /* If in FDPIC mode then force arm_pic_register to be r9. */
3683 arm_pic_register
= FDPIC_REGNUM
;
3685 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3688 if (arm_pic_register_string
!= NULL
)
3690 int pic_register
= decode_reg_name (arm_pic_register_string
);
3693 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3695 /* Prevent the user from choosing an obviously stupid PIC register. */
3696 else if (pic_register
< 0 || call_used_or_fixed_reg_p (pic_register
)
3697 || pic_register
== HARD_FRAME_POINTER_REGNUM
3698 || pic_register
== STACK_POINTER_REGNUM
3699 || pic_register
>= PC_REGNUM
3700 || (TARGET_VXWORKS_RTP
3701 && (unsigned int) pic_register
!= arm_pic_register
))
3702 error ("unable to use %qs for PIC register", arm_pic_register_string
);
3704 arm_pic_register
= pic_register
;
3708 target_word_relocations
= 1;
3710 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3711 if (fix_cm3_ldrd
== 2)
3713 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_cm3_ldrd
))
3719 /* Enable fix_vlldm by default if required. */
3722 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_vlldm
))
3728 /* Enable fix_aes by default if required. */
3729 if (fix_aes_erratum_1742098
== 2)
3731 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_aes_1742098
))
3732 fix_aes_erratum_1742098
= 1;
3734 fix_aes_erratum_1742098
= 0;
3737 /* Hot/Cold partitioning is not currently supported, since we can't
3738 handle literal pool placement in that case. */
3739 if (flag_reorder_blocks_and_partition
)
3741 inform (input_location
,
3742 "%<-freorder-blocks-and-partition%> not supported "
3743 "on this architecture");
3744 flag_reorder_blocks_and_partition
= 0;
3745 flag_reorder_blocks
= 1;
3749 /* Hoisting PIC address calculations more aggressively provides a small,
3750 but measurable, size reduction for PIC code. Therefore, we decrease
3751 the bar for unrestricted expression hoisting to the cost of PIC address
3752 calculation, which is 2 instructions. */
3753 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3754 param_gcse_unrestricted_cost
, 2);
3756 /* ARM EABI defaults to strict volatile bitfields. */
3757 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3758 && abi_version_at_least(2))
3759 flag_strict_volatile_bitfields
= 1;
3761 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3762 have deemed it beneficial (signified by setting
3763 prefetch.num_slots to 1 or more). */
3764 if (flag_prefetch_loop_arrays
< 0
3767 && current_tune
->prefetch
.num_slots
> 0)
3768 flag_prefetch_loop_arrays
= 1;
3770 /* Set up parameters to be used in prefetching algorithm. Do not
3771 override the defaults unless we are tuning for a core we have
3772 researched values for. */
3773 if (current_tune
->prefetch
.num_slots
> 0)
3774 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3775 param_simultaneous_prefetches
,
3776 current_tune
->prefetch
.num_slots
);
3777 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3778 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3779 param_l1_cache_line_size
,
3780 current_tune
->prefetch
.l1_cache_line_size
);
3781 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3783 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3784 param_destruct_interfere_size
,
3785 current_tune
->prefetch
.l1_cache_line_size
);
3786 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3787 param_construct_interfere_size
,
3788 current_tune
->prefetch
.l1_cache_line_size
);
3792 /* For a generic ARM target, JF Bastien proposed using 64 for both. */
3793 /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3795 /* More recent Cortex chips have a 64-byte cache line, but are marked
3796 ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults. */
3797 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3798 param_destruct_interfere_size
, 64);
3799 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3800 param_construct_interfere_size
, 64);
3803 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3804 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3805 param_l1_cache_size
,
3806 current_tune
->prefetch
.l1_cache_size
);
3808 /* Look through ready list and all of queue for instructions
3809 relevant for L2 auto-prefetcher. */
3810 int sched_autopref_queue_depth
;
3812 switch (current_tune
->sched_autopref
)
3814 case tune_params::SCHED_AUTOPREF_OFF
:
3815 sched_autopref_queue_depth
= -1;
3818 case tune_params::SCHED_AUTOPREF_RANK
:
3819 sched_autopref_queue_depth
= 0;
3822 case tune_params::SCHED_AUTOPREF_FULL
:
3823 sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3830 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3831 param_sched_autopref_queue_depth
,
3832 sched_autopref_queue_depth
);
3834 /* Currently, for slow flash data, we just disable literal pools. We also
3835 disable it for pure-code. */
3836 if (target_slow_flash_data
|| target_pure_code
)
3837 arm_disable_literal_pool
= true;
3839 /* Disable scheduling fusion by default if it's not armv7 processor
3840 or doesn't prefer ldrd/strd. */
3841 if (flag_schedule_fusion
== 2
3842 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3843 flag_schedule_fusion
= 0;
3845 /* Need to remember initial options before they are overriden. */
3846 init_optimize
= build_optimization_node (&global_options
,
3847 &global_options_set
);
3849 arm_options_perform_arch_sanity_checks ();
3850 arm_option_override_internal (&global_options
, &global_options_set
);
3851 arm_option_check_internal (&global_options
);
3852 arm_option_params_internal ();
3854 /* Create the default target_options structure. */
3855 target_option_default_node
= target_option_current_node
3856 = build_target_option_node (&global_options
, &global_options_set
);
3858 /* Register global variables with the garbage collector. */
3859 arm_add_gc_roots ();
3861 /* Init initial mode for testing. */
3862 thumb_flipper
= TARGET_THUMB
;
3866 /* Reconfigure global status flags from the active_target.isa. */
3868 arm_option_reconfigure_globals (void)
3870 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3871 arm_base_arch
= arm_active_target
.base_arch
;
3873 /* Initialize boolean versions of the architectural flags, for use
3874 in the arm.md file. */
3875 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv4
);
3876 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3877 arm_arch5t
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5t
);
3878 arm_arch5te
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5te
);
3879 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6
);
3880 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6k
);
3881 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3882 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3883 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7
);
3884 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7em
);
3885 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8
);
3886 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_1
);
3887 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_2
);
3888 arm_arch8_3
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_3
);
3889 arm_arch8_4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_4
);
3890 arm_arch8_1m_main
= bitmap_bit_p (arm_active_target
.isa
,
3891 isa_bit_armv8_1m_main
);
3892 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3893 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3894 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3895 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3896 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3897 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3898 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3899 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3900 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3901 arm_arch8m_main
= arm_arch7
&& arm_arch_cmse
;
3902 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3903 arm_arch_i8mm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_i8mm
);
3904 arm_arch_bf16
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_bf16
);
3906 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3909 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3910 error ("selected fp16 options are incompatible");
3911 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3915 arm_arch_cde_coproc
= 0;
3916 int cde_bits
[] = {isa_bit_cdecp0
, isa_bit_cdecp1
, isa_bit_cdecp2
,
3917 isa_bit_cdecp3
, isa_bit_cdecp4
, isa_bit_cdecp5
,
3918 isa_bit_cdecp6
, isa_bit_cdecp7
};
3919 for (int i
= 0, e
= ARRAY_SIZE (cde_bits
); i
< e
; i
++)
3921 int cde_bit
= bitmap_bit_p (arm_active_target
.isa
, cde_bits
[i
]);
3924 arm_arch_cde
|= cde_bit
;
3925 arm_arch_cde_coproc
|= arm_arch_cde_coproc_bits
[i
];
3929 /* And finally, set up some quirks. */
3930 arm_arch_no_volatile_ce
3931 = bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_volatile_ce
);
3932 arm_arch6kz
= arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
,
3933 isa_bit_quirk_armv6kz
);
3935 /* Use the cp15 method if it is available. */
3936 if (target_thread_pointer
== TP_AUTO
)
3938 if (arm_arch6k
&& !TARGET_THUMB1
)
3939 target_thread_pointer
= TP_TPIDRURO
;
3941 target_thread_pointer
= TP_SOFT
;
3944 if (!TARGET_HARD_TP
&& arm_stack_protector_guard
== SSP_TLSREG
)
3945 error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3948 /* Perform some validation between the desired architecture and the rest of the
3951 arm_options_perform_arch_sanity_checks (void)
3953 /* V5T code we generate is completely interworking capable, so we turn off
3954 TARGET_INTERWORK here to avoid many tests later on. */
3956 /* XXX However, we must pass the right pre-processor defines to CPP
3957 or GLD can get confused. This is a hack. */
3958 if (TARGET_INTERWORK
)
3959 arm_cpp_interwork
= 1;
3962 target_flags
&= ~MASK_INTERWORK
;
3964 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3965 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3967 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3968 error ("iwmmxt abi requires an iwmmxt capable cpu");
3970 /* BPABI targets use linker tricks to allow interworking on cores
3971 without thumb support. */
3972 if (TARGET_INTERWORK
3974 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3976 warning (0, "target CPU does not support interworking" );
3977 target_flags
&= ~MASK_INTERWORK
;
3980 /* If soft-float is specified then don't use FPU. */
3981 if (TARGET_SOFT_FLOAT
)
3982 arm_fpu_attr
= FPU_NONE
;
3984 arm_fpu_attr
= FPU_VFP
;
3986 if (TARGET_AAPCS_BASED
)
3988 if (TARGET_CALLER_INTERWORKING
)
3989 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3991 if (TARGET_CALLEE_INTERWORKING
)
3992 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3995 /* __fp16 support currently assumes the core has ldrh. */
3996 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3997 sorry ("%<__fp16%> and no ldrh");
3999 if (use_cmse
&& !arm_arch_cmse
)
4000 error ("target CPU does not support ARMv8-M Security Extensions");
4002 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
4003 and ARMv8-M Baseline and Mainline do not allow such configuration. */
4004 if (use_cmse
&& TARGET_HARD_FLOAT
&& LAST_VFP_REGNUM
> LAST_LO_VFP_REGNUM
)
4005 error ("ARMv8-M Security Extensions incompatible with selected FPU");
4008 if (TARGET_AAPCS_BASED
)
4010 if (arm_abi
== ARM_ABI_IWMMXT
)
4011 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
4012 else if (TARGET_HARD_FLOAT_ABI
)
4014 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
4015 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_vfpv2
)
4016 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_mve
))
4017 error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
4020 arm_pcs_default
= ARM_PCS_AAPCS
;
4024 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
4025 sorry ("%<-mfloat-abi=hard%> and VFP");
4027 if (arm_abi
== ARM_ABI_APCS
)
4028 arm_pcs_default
= ARM_PCS_APCS
;
4030 arm_pcs_default
= ARM_PCS_ATPCS
;
4034 /* Test whether a local function descriptor is canonical, i.e.,
4035 whether we can use GOTOFFFUNCDESC to compute the address of the
4038 arm_fdpic_local_funcdesc_p (rtx fnx
)
4041 enum symbol_visibility vis
;
4047 if (! SYMBOL_REF_LOCAL_P (fnx
))
4050 fn
= SYMBOL_REF_DECL (fnx
);
4055 vis
= DECL_VISIBILITY (fn
);
4057 if (vis
== VISIBILITY_PROTECTED
)
4058 /* Private function descriptors for protected functions are not
4059 canonical. Temporarily change the visibility to global so that
4060 we can ensure uniqueness of funcdesc pointers. */
4061 DECL_VISIBILITY (fn
) = VISIBILITY_DEFAULT
;
4063 ret
= default_binds_local_p_1 (fn
, flag_pic
);
4065 DECL_VISIBILITY (fn
) = vis
;
4071 arm_add_gc_roots (void)
4073 gcc_obstack_init(&minipool_obstack
);
4074 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
4077 /* A table of known ARM exception types.
4078 For use with the interrupt function attribute. */
4082 const char *const arg
;
4083 const unsigned long return_value
;
4087 static const isr_attribute_arg isr_attribute_args
[] =
4089 { "IRQ", ARM_FT_ISR
},
4090 { "irq", ARM_FT_ISR
},
4091 { "FIQ", ARM_FT_FIQ
},
4092 { "fiq", ARM_FT_FIQ
},
4093 { "ABORT", ARM_FT_ISR
},
4094 { "abort", ARM_FT_ISR
},
4095 { "UNDEF", ARM_FT_EXCEPTION
},
4096 { "undef", ARM_FT_EXCEPTION
},
4097 { "SWI", ARM_FT_EXCEPTION
},
4098 { "swi", ARM_FT_EXCEPTION
},
4099 { NULL
, ARM_FT_NORMAL
}
4102 /* Returns the (interrupt) function type of the current
4103 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
4105 static unsigned long
4106 arm_isr_value (tree argument
)
4108 const isr_attribute_arg
* ptr
;
4112 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
4114 /* No argument - default to IRQ. */
4115 if (argument
== NULL_TREE
)
4118 /* Get the value of the argument. */
4119 if (TREE_VALUE (argument
) == NULL_TREE
4120 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
4121 return ARM_FT_UNKNOWN
;
4123 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
4125 /* Check it against the list of known arguments. */
4126 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
4127 if (streq (arg
, ptr
->arg
))
4128 return ptr
->return_value
;
4130 /* An unrecognized interrupt type. */
4131 return ARM_FT_UNKNOWN
;
4134 /* Computes the type of the current function. */
4136 static unsigned long
4137 arm_compute_func_type (void)
4139 unsigned long type
= ARM_FT_UNKNOWN
;
4143 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
4145 /* Decide if the current function is volatile. Such functions
4146 never return, and many memory cycles can be saved by not storing
4147 register values that will never be needed again. This optimization
4148 was added to speed up context switching in a kernel application. */
4150 && (TREE_NOTHROW (current_function_decl
)
4151 || !(flag_unwind_tables
4153 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
4154 && TREE_THIS_VOLATILE (current_function_decl
))
4155 type
|= ARM_FT_VOLATILE
;
4157 if (cfun
->static_chain_decl
!= NULL
)
4158 type
|= ARM_FT_NESTED
;
4160 attr
= DECL_ATTRIBUTES (current_function_decl
);
4162 a
= lookup_attribute ("naked", attr
);
4164 type
|= ARM_FT_NAKED
;
4166 a
= lookup_attribute ("isr", attr
);
4168 a
= lookup_attribute ("interrupt", attr
);
4171 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
4173 type
|= arm_isr_value (TREE_VALUE (a
));
4175 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
4176 type
|= ARM_FT_CMSE_ENTRY
;
4181 /* Returns the type of the current function. */
4184 arm_current_func_type (void)
4186 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
4187 cfun
->machine
->func_type
= arm_compute_func_type ();
4189 return cfun
->machine
->func_type
;
4193 arm_allocate_stack_slots_for_args (void)
4195 /* Naked functions should not allocate stack slots for arguments. */
4196 return !IS_NAKED (arm_current_func_type ());
4200 arm_warn_func_return (tree decl
)
4202 /* Naked functions are implemented entirely in assembly, including the
4203 return sequence, so suppress warnings about this. */
4204 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
4208 /* Output assembler code for a block containing the constant parts
4209 of a trampoline, leaving space for the variable parts.
4211 On the ARM, (if r8 is the static chain regnum, and remembering that
4212 referencing pc adds an offset of 8) the trampoline looks like:
4215 .word static chain value
4216 .word function's address
4217 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4219 In FDPIC mode, the trampoline looks like:
4220 .word trampoline address
4221 .word trampoline GOT address
4222 ldr r12, [pc, #8] ; #4 for Arm mode
4223 ldr r9, [pc, #8] ; #4 for Arm mode
4224 ldr pc, [pc, #8] ; #4 for Arm mode
4225 .word static chain value
4227 .word function's address
4231 arm_asm_trampoline_template (FILE *f
)
4233 fprintf (f
, "\t.syntax unified\n");
4237 /* The first two words are a function descriptor pointing to the
4238 trampoline code just below. */
4240 fprintf (f
, "\t.arm\n");
4241 else if (TARGET_THUMB2
)
4242 fprintf (f
, "\t.thumb\n");
4244 /* Only ARM and Thumb-2 are supported. */
4247 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4248 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4249 /* Trampoline code which sets the static chain register but also
4250 PIC register before jumping into real code. */
4251 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4252 STATIC_CHAIN_REGNUM
, PC_REGNUM
,
4253 TARGET_THUMB2
? 8 : 4);
4254 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4255 PIC_OFFSET_TABLE_REGNUM
, PC_REGNUM
,
4256 TARGET_THUMB2
? 8 : 4);
4257 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4258 PC_REGNUM
, PC_REGNUM
,
4259 TARGET_THUMB2
? 8 : 4);
4260 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4262 else if (TARGET_ARM
)
4264 fprintf (f
, "\t.arm\n");
4265 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
4266 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
4268 else if (TARGET_THUMB2
)
4270 fprintf (f
, "\t.thumb\n");
4271 /* The Thumb-2 trampoline is similar to the arm implementation.
4272 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4273 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
4274 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
4275 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
4279 ASM_OUTPUT_ALIGN (f
, 2);
4280 fprintf (f
, "\t.code\t16\n");
4281 fprintf (f
, ".Ltrampoline_start:\n");
4282 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
4283 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4284 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
4285 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4286 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
4287 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
4289 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4290 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4293 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4296 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4298 rtx fnaddr
, mem
, a_tramp
;
4300 emit_block_move (m_tramp
, assemble_trampoline_template (),
4301 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
4305 rtx funcdesc
= XEXP (DECL_RTL (fndecl
), 0);
4306 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
4307 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
4308 /* The function start address is at offset 8, but in Thumb mode
4309 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4311 rtx trampoline_code_start
4312 = plus_constant (Pmode
, XEXP (m_tramp
, 0), TARGET_THUMB2
? 9 : 8);
4314 /* Write initial funcdesc which points to the trampoline. */
4315 mem
= adjust_address (m_tramp
, SImode
, 0);
4316 emit_move_insn (mem
, trampoline_code_start
);
4317 mem
= adjust_address (m_tramp
, SImode
, 4);
4318 emit_move_insn (mem
, gen_rtx_REG (Pmode
, PIC_OFFSET_TABLE_REGNUM
));
4319 /* Setup static chain. */
4320 mem
= adjust_address (m_tramp
, SImode
, 20);
4321 emit_move_insn (mem
, chain_value
);
4322 /* GOT + real function entry point. */
4323 mem
= adjust_address (m_tramp
, SImode
, 24);
4324 emit_move_insn (mem
, gotaddr
);
4325 mem
= adjust_address (m_tramp
, SImode
, 28);
4326 emit_move_insn (mem
, fnaddr
);
4330 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
4331 emit_move_insn (mem
, chain_value
);
4333 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
4334 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4335 emit_move_insn (mem
, fnaddr
);
4338 a_tramp
= XEXP (m_tramp
, 0);
4339 maybe_emit_call_builtin___clear_cache (a_tramp
,
4340 plus_constant (ptr_mode
,
4345 /* Thumb trampolines should be entered in thumb mode, so set
4346 the bottom bit of the address. */
4349 arm_trampoline_adjust_address (rtx addr
)
4351 /* For FDPIC don't fix trampoline address since it's a function
4352 descriptor and not a function address. */
4353 if (TARGET_THUMB
&& !TARGET_FDPIC
)
4354 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
4355 NULL
, 0, OPTAB_LIB_WIDEN
);
4359 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4360 includes call-clobbered registers too. If this is a leaf function
4361 we can just examine the registers used by the RTL, but otherwise we
4362 have to assume that whatever function is called might clobber
4363 anything, and so we have to save all the call-clobbered registers
4365 static inline bool reg_needs_saving_p (unsigned reg
)
4367 unsigned long func_type
= arm_current_func_type ();
4369 if (IS_INTERRUPT (func_type
))
4370 if (df_regs_ever_live_p (reg
)
4371 /* Save call-clobbered core registers. */
4372 || (! crtl
->is_leaf
&& call_used_or_fixed_reg_p (reg
) && reg
< FIRST_VFP_REGNUM
))
4377 if (!df_regs_ever_live_p (reg
)
4378 || call_used_or_fixed_reg_p (reg
))
4384 /* Return 1 if it is possible to return using a single instruction.
4385 If SIBLING is non-null, this is a test for a return before a sibling
4386 call. SIBLING is the call insn, so we can examine its register usage. */
4389 use_return_insn (int iscond
, rtx sibling
)
4392 unsigned int func_type
;
4393 unsigned long saved_int_regs
;
4394 unsigned HOST_WIDE_INT stack_adjust
;
4395 arm_stack_offsets
*offsets
;
4397 /* Never use a return instruction before reload has run. */
4398 if (!reload_completed
)
4401 /* Never use a return instruction when return address signing
4402 mechanism is enabled as it requires more than one
4404 if (arm_current_function_pac_enabled_p ())
4407 func_type
= arm_current_func_type ();
4409 /* Naked, volatile and stack alignment functions need special
4411 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
4414 /* So do interrupt functions that use the frame pointer and Thumb
4415 interrupt functions. */
4416 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
4419 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
4420 && !optimize_function_for_size_p (cfun
))
4423 offsets
= arm_get_frame_offsets ();
4424 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
4426 /* As do variadic functions. */
4427 if (crtl
->args
.pretend_args_size
4428 || cfun
->machine
->uses_anonymous_args
4429 /* Or if the function calls __builtin_eh_return () */
4430 || crtl
->calls_eh_return
4431 /* Or if the function calls alloca */
4432 || cfun
->calls_alloca
4433 /* Or if there is a stack adjustment. However, if the stack pointer
4434 is saved on the stack, we can use a pre-incrementing stack load. */
4435 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
4436 && stack_adjust
== 4))
4437 /* Or if the static chain register was saved above the frame, under the
4438 assumption that the stack pointer isn't saved on the stack. */
4439 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
4440 && arm_compute_static_chain_stack_bytes() != 0))
4443 saved_int_regs
= offsets
->saved_regs_mask
;
4445 /* Unfortunately, the insn
4447 ldmib sp, {..., sp, ...}
4449 triggers a bug on most SA-110 based devices, such that the stack
4450 pointer won't be correctly restored if the instruction takes a
4451 page fault. We work around this problem by popping r3 along with
4452 the other registers, since that is never slower than executing
4453 another instruction.
4455 We test for !arm_arch5t here, because code for any architecture
4456 less than this could potentially be run on one of the buggy
4458 if (stack_adjust
== 4 && !arm_arch5t
&& TARGET_ARM
)
4460 /* Validate that r3 is a call-clobbered register (always true in
4461 the default abi) ... */
4462 if (!call_used_or_fixed_reg_p (3))
4465 /* ... that it isn't being used for a return value ... */
4466 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
4469 /* ... or for a tail-call argument ... */
4472 gcc_assert (CALL_P (sibling
));
4474 if (find_regno_fusage (sibling
, USE
, 3))
4478 /* ... and that there are no call-saved registers in r0-r2
4479 (always true in the default ABI). */
4480 if (saved_int_regs
& 0x7)
4484 /* Can't be done if interworking with Thumb, and any registers have been
4486 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
4489 /* On StrongARM, conditional returns are expensive if they aren't
4490 taken and multiple registers have been stacked. */
4491 if (iscond
&& arm_tune_strongarm
)
4493 /* Conditional return when just the LR is stored is a simple
4494 conditional-load instruction, that's not expensive. */
4495 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
4499 && arm_pic_register
!= INVALID_REGNUM
4500 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4504 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4505 several instructions if anything needs to be popped. Armv8.1-M Mainline
4506 also needs several instructions to save and restore FP context. */
4507 if (IS_CMSE_ENTRY (func_type
) && (saved_int_regs
|| TARGET_HAVE_FPCXT_CMSE
))
4510 /* If there are saved registers but the LR isn't saved, then we need
4511 two instructions for the return. */
4512 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4515 /* Can't be done if any of the VFP regs are pushed,
4516 since this also requires an insn. */
4517 if (TARGET_VFP_BASE
)
4518 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4519 if (reg_needs_saving_p (regno
))
4522 if (TARGET_REALLY_IWMMXT
)
4523 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4524 if (reg_needs_saving_p (regno
))
4530 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4531 shrink-wrapping if possible. This is the case if we need to emit a
4532 prologue, which we can test by looking at the offsets. */
4534 use_simple_return_p (void)
4536 arm_stack_offsets
*offsets
;
4538 /* Note this function can be called before or after reload. */
4539 if (!reload_completed
)
4540 arm_compute_frame_layout ();
4542 offsets
= arm_get_frame_offsets ();
4543 return offsets
->outgoing_args
!= 0;
4546 /* Return TRUE if int I is a valid immediate ARM constant. */
4549 const_ok_for_arm (HOST_WIDE_INT i
)
4553 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4554 be all zero, or all one. */
4555 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4556 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4557 != ((~(unsigned HOST_WIDE_INT
) 0)
4558 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4561 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4563 /* Fast return for 0 and small values. We must do this for zero, since
4564 the code below can't handle that one case. */
4565 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4568 /* Get the number of trailing zeros. */
4569 lowbit
= ffs((int) i
) - 1;
4571 /* Only even shifts are allowed in ARM mode so round down to the
4572 nearest even number. */
4576 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4581 /* Allow rotated constants in ARM mode. */
4583 && ((i
& ~0xc000003f) == 0
4584 || (i
& ~0xf000000f) == 0
4585 || (i
& ~0xfc000003) == 0))
4588 else if (TARGET_THUMB2
)
4592 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4595 if (i
== v
|| i
== (v
| (v
<< 8)))
4598 /* Allow repeated pattern 0xXY00XY00. */
4604 else if (TARGET_HAVE_MOVT
)
4606 /* Thumb-1 Targets with MOVT. */
4616 /* Return true if I is a valid constant for the operation CODE. */
4618 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4620 if (const_ok_for_arm (i
))
4626 /* See if we can use movw. */
4627 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4630 /* Otherwise, try mvn. */
4631 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4634 /* See if we can use addw or subw. */
4636 && ((i
& 0xfffff000) == 0
4637 || ((-i
) & 0xfffff000) == 0))
4658 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4660 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4666 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4670 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4677 /* Return true if I is a valid di mode constant for the operation CODE. */
4679 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4681 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4682 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4683 rtx hi
= GEN_INT (hi_val
);
4684 rtx lo
= GEN_INT (lo_val
);
4694 return const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF
4695 || const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF;
4697 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4704 /* Emit a sequence of insns to handle a large constant.
4705 CODE is the code of the operation required, it can be any of SET, PLUS,
4706 IOR, AND, XOR, MINUS;
4707 MODE is the mode in which the operation is being performed;
4708 VAL is the integer to operate on;
4709 SOURCE is the other operand (a register, or a null-pointer for SET);
4710 SUBTARGETS means it is safe to create scratch registers if that will
4711 either produce a simpler sequence, or we will want to cse the values.
4712 Return value is the number of insns emitted. */
4714 /* ??? Tweak this for thumb2. */
4716 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4717 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4721 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4722 cond
= COND_EXEC_TEST (PATTERN (insn
));
4726 if (subtargets
|| code
== SET
4727 || (REG_P (target
) && REG_P (source
)
4728 && REGNO (target
) != REGNO (source
)))
4730 /* After arm_reorg has been called, we can't fix up expensive
4731 constants by pushing them into memory so we must synthesize
4732 them in-line, regardless of the cost. This is only likely to
4733 be more costly on chips that have load delay slots and we are
4734 compiling without running the scheduler (so no splitting
4735 occurred before the final instruction emission).
4737 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4739 if (!cfun
->machine
->after_arm_reorg
4741 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4743 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4748 /* Currently SET is the only monadic value for CODE, all
4749 the rest are diadic. */
4750 if (TARGET_USE_MOVT
)
4751 arm_emit_movpair (target
, GEN_INT (val
));
4753 emit_set_insn (target
, GEN_INT (val
));
4759 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4761 if (TARGET_USE_MOVT
)
4762 arm_emit_movpair (temp
, GEN_INT (val
));
4764 emit_set_insn (temp
, GEN_INT (val
));
4766 /* For MINUS, the value is subtracted from, since we never
4767 have subtraction of a constant. */
4769 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4771 emit_set_insn (target
,
4772 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4778 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4782 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4783 ARM/THUMB2 immediates, and add up to VAL.
4784 Thr function return value gives the number of insns required. */
4786 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4787 struct four_ints
*return_sequence
)
4789 int best_consecutive_zeros
= 0;
4793 struct four_ints tmp_sequence
;
4795 /* If we aren't targeting ARM, the best place to start is always at
4796 the bottom, otherwise look more closely. */
4799 for (i
= 0; i
< 32; i
+= 2)
4801 int consecutive_zeros
= 0;
4803 if (!(val
& (3 << i
)))
4805 while ((i
< 32) && !(val
& (3 << i
)))
4807 consecutive_zeros
+= 2;
4810 if (consecutive_zeros
> best_consecutive_zeros
)
4812 best_consecutive_zeros
= consecutive_zeros
;
4813 best_start
= i
- consecutive_zeros
;
4820 /* So long as it won't require any more insns to do so, it's
4821 desirable to emit a small constant (in bits 0...9) in the last
4822 insn. This way there is more chance that it can be combined with
4823 a later addressing insn to form a pre-indexed load or store
4824 operation. Consider:
4826 *((volatile int *)0xe0000100) = 1;
4827 *((volatile int *)0xe0000110) = 2;
4829 We want this to wind up as:
4833 str rB, [rA, #0x100]
4835 str rB, [rA, #0x110]
4837 rather than having to synthesize both large constants from scratch.
4839 Therefore, we calculate how many insns would be required to emit
4840 the constant starting from `best_start', and also starting from
4841 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4842 yield a shorter sequence, we may as well use zero. */
4843 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4845 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4847 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4848 if (insns2
<= insns1
)
4850 *return_sequence
= tmp_sequence
;
4858 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4860 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4861 struct four_ints
*return_sequence
, int i
)
4863 int remainder
= val
& 0xffffffff;
4866 /* Try and find a way of doing the job in either two or three
4869 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4870 location. We start at position I. This may be the MSB, or
4871 optimial_immediate_sequence may have positioned it at the largest block
4872 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4873 wrapping around to the top of the word when we drop off the bottom.
4874 In the worst case this code should produce no more than four insns.
4876 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4877 constants, shifted to any arbitrary location. We should always start
4882 unsigned int b1
, b2
, b3
, b4
;
4883 unsigned HOST_WIDE_INT result
;
4886 gcc_assert (insns
< 4);
4891 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4892 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4895 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4896 /* We can use addw/subw for the last 12 bits. */
4900 /* Use an 8-bit shifted/rotated immediate. */
4904 result
= remainder
& ((0x0ff << end
)
4905 | ((i
< end
) ? (0xff >> (32 - end
))
4912 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4913 arbitrary shifts. */
4914 i
-= TARGET_ARM
? 2 : 1;
4918 /* Next, see if we can do a better job with a thumb2 replicated
4921 We do it this way around to catch the cases like 0x01F001E0 where
4922 two 8-bit immediates would work, but a replicated constant would
4925 TODO: 16-bit constants that don't clear all the bits, but still win.
4926 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4929 b1
= (remainder
& 0xff000000) >> 24;
4930 b2
= (remainder
& 0x00ff0000) >> 16;
4931 b3
= (remainder
& 0x0000ff00) >> 8;
4932 b4
= remainder
& 0xff;
4936 /* The 8-bit immediate already found clears b1 (and maybe b2),
4937 but must leave b3 and b4 alone. */
4939 /* First try to find a 32-bit replicated constant that clears
4940 almost everything. We can assume that we can't do it in one,
4941 or else we wouldn't be here. */
4942 unsigned int tmp
= b1
& b2
& b3
& b4
;
4943 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4945 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4946 + (tmp
== b3
) + (tmp
== b4
);
4948 && (matching_bytes
>= 3
4949 || (matching_bytes
== 2
4950 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4952 /* At least 3 of the bytes match, and the fourth has at
4953 least as many bits set, or two of the bytes match
4954 and it will only require one more insn to finish. */
4962 /* Second, try to find a 16-bit replicated constant that can
4963 leave three of the bytes clear. If b2 or b4 is already
4964 zero, then we can. If the 8-bit from above would not
4965 clear b2 anyway, then we still win. */
4966 else if (b1
== b3
&& (!b2
|| !b4
4967 || (remainder
& 0x00ff0000 & ~result
)))
4969 result
= remainder
& 0xff00ff00;
4975 /* The 8-bit immediate already found clears b2 (and maybe b3)
4976 and we don't get here unless b1 is alredy clear, but it will
4977 leave b4 unchanged. */
4979 /* If we can clear b2 and b4 at once, then we win, since the
4980 8-bits couldn't possibly reach that far. */
4983 result
= remainder
& 0x00ff00ff;
4989 return_sequence
->i
[insns
++] = result
;
4990 remainder
&= ~result
;
4992 if (code
== SET
|| code
== MINUS
)
5000 /* Emit an instruction with the indicated PATTERN. If COND is
5001 non-NULL, conditionalize the execution of the instruction on COND
5005 emit_constant_insn (rtx cond
, rtx pattern
)
5008 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
5009 emit_insn (pattern
);
5012 /* As above, but extra parameter GENERATE which, if clear, suppresses
5016 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
5017 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
5018 int subtargets
, int generate
)
5022 int final_invert
= 0;
5024 int set_sign_bit_copies
= 0;
5025 int clear_sign_bit_copies
= 0;
5026 int clear_zero_bit_copies
= 0;
5027 int set_zero_bit_copies
= 0;
5028 int insns
= 0, neg_insns
, inv_insns
;
5029 unsigned HOST_WIDE_INT temp1
, temp2
;
5030 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
5031 struct four_ints
*immediates
;
5032 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
5034 /* Find out which operations are safe for a given CODE. Also do a quick
5035 check for degenerate cases; these can occur when DImode operations
5048 if (remainder
== 0xffffffff)
5051 emit_constant_insn (cond
,
5052 gen_rtx_SET (target
,
5053 GEN_INT (ARM_SIGN_EXTEND (val
))));
5059 if (reload_completed
&& rtx_equal_p (target
, source
))
5063 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5072 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
5075 if (remainder
== 0xffffffff)
5077 if (reload_completed
&& rtx_equal_p (target
, source
))
5080 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5089 if (reload_completed
&& rtx_equal_p (target
, source
))
5092 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5096 if (remainder
== 0xffffffff)
5099 emit_constant_insn (cond
,
5100 gen_rtx_SET (target
,
5101 gen_rtx_NOT (mode
, source
)));
5108 /* We treat MINUS as (val - source), since (source - val) is always
5109 passed as (source + (-val)). */
5113 emit_constant_insn (cond
,
5114 gen_rtx_SET (target
,
5115 gen_rtx_NEG (mode
, source
)));
5118 if (const_ok_for_arm (val
))
5121 emit_constant_insn (cond
,
5122 gen_rtx_SET (target
,
5123 gen_rtx_MINUS (mode
, GEN_INT (val
),
5134 /* If we can do it in one insn get out quickly. */
5135 if (const_ok_for_op (val
, code
))
5138 emit_constant_insn (cond
,
5139 gen_rtx_SET (target
,
5141 ? gen_rtx_fmt_ee (code
, mode
, source
,
5147 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5149 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
5150 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
5154 if (mode
== SImode
&& i
== 16)
5155 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5157 emit_constant_insn (cond
,
5158 gen_zero_extendhisi2
5159 (target
, gen_lowpart (HImode
, source
)));
5161 /* Extz only supports SImode, but we can coerce the operands
5163 emit_constant_insn (cond
,
5164 gen_extzv_t2 (gen_lowpart (SImode
, target
),
5165 gen_lowpart (SImode
, source
),
5166 GEN_INT (i
), const0_rtx
));
5172 /* Calculate a few attributes that may be useful for specific
5174 /* Count number of leading zeros. */
5175 for (i
= 31; i
>= 0; i
--)
5177 if ((remainder
& (1 << i
)) == 0)
5178 clear_sign_bit_copies
++;
5183 /* Count number of leading 1's. */
5184 for (i
= 31; i
>= 0; i
--)
5186 if ((remainder
& (1 << i
)) != 0)
5187 set_sign_bit_copies
++;
5192 /* Count number of trailing zero's. */
5193 for (i
= 0; i
<= 31; i
++)
5195 if ((remainder
& (1 << i
)) == 0)
5196 clear_zero_bit_copies
++;
5201 /* Count number of trailing 1's. */
5202 for (i
= 0; i
<= 31; i
++)
5204 if ((remainder
& (1 << i
)) != 0)
5205 set_zero_bit_copies
++;
5213 /* See if we can do this by sign_extending a constant that is known
5214 to be negative. This is a good, way of doing it, since the shift
5215 may well merge into a subsequent insn. */
5216 if (set_sign_bit_copies
> 1)
5218 if (const_ok_for_arm
5219 (temp1
= ARM_SIGN_EXTEND (remainder
5220 << (set_sign_bit_copies
- 1))))
5224 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5225 emit_constant_insn (cond
,
5226 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5227 emit_constant_insn (cond
,
5228 gen_ashrsi3 (target
, new_src
,
5229 GEN_INT (set_sign_bit_copies
- 1)));
5233 /* For an inverted constant, we will need to set the low bits,
5234 these will be shifted out of harm's way. */
5235 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
5236 if (const_ok_for_arm (~temp1
))
5240 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5241 emit_constant_insn (cond
,
5242 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5243 emit_constant_insn (cond
,
5244 gen_ashrsi3 (target
, new_src
,
5245 GEN_INT (set_sign_bit_copies
- 1)));
5251 /* See if we can calculate the value as the difference between two
5252 valid immediates. */
5253 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
5255 int topshift
= clear_sign_bit_copies
& ~1;
5257 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
5258 & (0xff000000 >> topshift
));
5260 /* If temp1 is zero, then that means the 9 most significant
5261 bits of remainder were 1 and we've caused it to overflow.
5262 When topshift is 0 we don't need to do anything since we
5263 can borrow from 'bit 32'. */
5264 if (temp1
== 0 && topshift
!= 0)
5265 temp1
= 0x80000000 >> (topshift
- 1);
5267 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
5269 if (const_ok_for_arm (temp2
))
5273 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5274 emit_constant_insn (cond
,
5275 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5276 emit_constant_insn (cond
,
5277 gen_addsi3 (target
, new_src
,
5285 /* See if we can generate this by setting the bottom (or the top)
5286 16 bits, and then shifting these into the other half of the
5287 word. We only look for the simplest cases, to do more would cost
5288 too much. Be careful, however, not to generate this when the
5289 alternative would take fewer insns. */
5290 if (val
& 0xffff0000)
5292 temp1
= remainder
& 0xffff0000;
5293 temp2
= remainder
& 0x0000ffff;
5295 /* Overlaps outside this range are best done using other methods. */
5296 for (i
= 9; i
< 24; i
++)
5298 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
5299 && !const_ok_for_arm (temp2
))
5301 rtx new_src
= (subtargets
5302 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
5304 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
5305 source
, subtargets
, generate
);
5313 gen_rtx_ASHIFT (mode
, source
,
5320 /* Don't duplicate cases already considered. */
5321 for (i
= 17; i
< 24; i
++)
5323 if (((temp1
| (temp1
>> i
)) == remainder
)
5324 && !const_ok_for_arm (temp1
))
5326 rtx new_src
= (subtargets
5327 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
5329 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
5330 source
, subtargets
, generate
);
5335 gen_rtx_SET (target
,
5338 gen_rtx_LSHIFTRT (mode
, source
,
5349 /* If we have IOR or XOR, and the constant can be loaded in a
5350 single instruction, and we can find a temporary to put it in,
5351 then this can be done in two instructions instead of 3-4. */
5353 /* TARGET can't be NULL if SUBTARGETS is 0 */
5354 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
5356 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
5360 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5362 emit_constant_insn (cond
,
5363 gen_rtx_SET (sub
, GEN_INT (val
)));
5364 emit_constant_insn (cond
,
5365 gen_rtx_SET (target
,
5366 gen_rtx_fmt_ee (code
, mode
,
5377 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5378 and the remainder 0s for e.g. 0xfff00000)
5379 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5381 This can be done in 2 instructions by using shifts with mov or mvn.
5386 mvn r0, r0, lsr #12 */
5387 if (set_sign_bit_copies
> 8
5388 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
5392 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5393 rtx shift
= GEN_INT (set_sign_bit_copies
);
5399 gen_rtx_ASHIFT (mode
,
5404 gen_rtx_SET (target
,
5406 gen_rtx_LSHIFTRT (mode
, sub
,
5413 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5415 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5417 For eg. r0 = r0 | 0xfff
5422 if (set_zero_bit_copies
> 8
5423 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
5427 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5428 rtx shift
= GEN_INT (set_zero_bit_copies
);
5434 gen_rtx_LSHIFTRT (mode
,
5439 gen_rtx_SET (target
,
5441 gen_rtx_ASHIFT (mode
, sub
,
5447 /* This will never be reached for Thumb2 because orn is a valid
5448 instruction. This is for Thumb1 and the ARM 32 bit cases.
5450 x = y | constant (such that ~constant is a valid constant)
5452 x = ~(~y & ~constant).
5454 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
5458 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5459 emit_constant_insn (cond
,
5461 gen_rtx_NOT (mode
, source
)));
5464 sub
= gen_reg_rtx (mode
);
5465 emit_constant_insn (cond
,
5467 gen_rtx_AND (mode
, source
,
5469 emit_constant_insn (cond
,
5470 gen_rtx_SET (target
,
5471 gen_rtx_NOT (mode
, sub
)));
5478 /* See if two shifts will do 2 or more insn's worth of work. */
5479 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
5481 HOST_WIDE_INT shift_mask
= ((0xffffffff
5482 << (32 - clear_sign_bit_copies
))
5485 if ((remainder
| shift_mask
) != 0xffffffff)
5487 HOST_WIDE_INT new_val
5488 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5492 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5493 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
5494 new_src
, source
, subtargets
, 1);
5499 rtx targ
= subtargets
? NULL_RTX
: target
;
5500 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5501 targ
, source
, subtargets
, 0);
5507 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5508 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5510 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5511 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5517 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5519 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5521 if ((remainder
| shift_mask
) != 0xffffffff)
5523 HOST_WIDE_INT new_val
5524 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5527 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5529 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5530 new_src
, source
, subtargets
, 1);
5535 rtx targ
= subtargets
? NULL_RTX
: target
;
5537 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5538 targ
, source
, subtargets
, 0);
5544 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5545 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5547 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5548 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5560 /* Calculate what the instruction sequences would be if we generated it
5561 normally, negated, or inverted. */
5563 /* AND cannot be split into multiple insns, so invert and use BIC. */
5566 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5569 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5574 if (can_invert
|| final_invert
)
5575 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5580 immediates
= &pos_immediates
;
5582 /* Is the negated immediate sequence more efficient? */
5583 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5586 immediates
= &neg_immediates
;
5591 /* Is the inverted immediate sequence more efficient?
5592 We must allow for an extra NOT instruction for XOR operations, although
5593 there is some chance that the final 'mvn' will get optimized later. */
5594 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5597 immediates
= &inv_immediates
;
5605 /* Now output the chosen sequence as instructions. */
5608 for (i
= 0; i
< insns
; i
++)
5610 rtx new_src
, temp1_rtx
;
5612 temp1
= immediates
->i
[i
];
5614 if (code
== SET
|| code
== MINUS
)
5615 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5616 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5617 new_src
= gen_reg_rtx (mode
);
5623 else if (can_negate
)
5626 temp1
= trunc_int_for_mode (temp1
, mode
);
5627 temp1_rtx
= GEN_INT (temp1
);
5631 else if (code
== MINUS
)
5632 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5634 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5636 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5641 can_negate
= can_invert
;
5645 else if (code
== MINUS
)
5653 emit_constant_insn (cond
, gen_rtx_SET (target
,
5654 gen_rtx_NOT (mode
, source
)));
5661 /* Return TRUE if op is a constant where both the low and top words are
5662 suitable for RSB/RSC instructions. This is never true for Thumb, since
5663 we do not have RSC in that case. */
5665 arm_const_double_prefer_rsbs_rsc (rtx op
)
5667 /* Thumb lacks RSC, so we never prefer that sequence. */
5668 if (TARGET_THUMB
|| !CONST_INT_P (op
))
5670 HOST_WIDE_INT hi
, lo
;
5671 lo
= UINTVAL (op
) & 0xffffffffULL
;
5672 hi
= UINTVAL (op
) >> 32;
5673 return const_ok_for_arm (lo
) && const_ok_for_arm (hi
);
5676 /* Canonicalize a comparison so that we are more likely to recognize it.
5677 This can be done for a few constant compares, where we can make the
5678 immediate value easier to load. */
5681 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5682 bool op0_preserve_value
)
5685 unsigned HOST_WIDE_INT i
, maxval
;
5687 mode
= GET_MODE (*op0
);
5688 if (mode
== VOIDmode
)
5689 mode
= GET_MODE (*op1
);
5691 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5693 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5694 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5695 either reversed or (for constant OP1) adjusted to GE/LT.
5696 Similarly for GTU/LEU in Thumb mode. */
5700 if (*code
== GT
|| *code
== LE
5701 || *code
== GTU
|| *code
== LEU
)
5703 /* Missing comparison. First try to use an available
5705 if (CONST_INT_P (*op1
))
5714 /* Try to convert to GE/LT, unless that would be more
5716 if (!arm_const_double_by_immediates (GEN_INT (i
+ 1))
5717 && arm_const_double_prefer_rsbs_rsc (*op1
))
5719 *op1
= GEN_INT (i
+ 1);
5720 *code
= *code
== GT
? GE
: LT
;
5724 /* GT maxval is always false, LE maxval is always true.
5725 We can't fold that away here as we must make a
5726 comparison, but we can fold them to comparisons
5727 with the same result that can be handled:
5728 op0 GT maxval -> op0 LT minval
5729 op0 LE maxval -> op0 GE minval
5730 where minval = (-maxval - 1). */
5731 *op1
= GEN_INT (-maxval
- 1);
5732 *code
= *code
== GT
? LT
: GE
;
5738 if (i
!= ~((unsigned HOST_WIDE_INT
) 0))
5740 /* Try to convert to GEU/LTU, unless that would
5741 be more expensive. */
5742 if (!arm_const_double_by_immediates (GEN_INT (i
+ 1))
5743 && arm_const_double_prefer_rsbs_rsc (*op1
))
5745 *op1
= GEN_INT (i
+ 1);
5746 *code
= *code
== GTU
? GEU
: LTU
;
5750 /* GTU ~0 is always false, LEU ~0 is always true.
5751 We can't fold that away here as we must make a
5752 comparison, but we can fold them to comparisons
5753 with the same result that can be handled:
5754 op0 GTU ~0 -> op0 LTU 0
5755 op0 LEU ~0 -> op0 GEU 0. */
5757 *code
= *code
== GTU
? LTU
: GEU
;
5766 if (!op0_preserve_value
)
5768 std::swap (*op0
, *op1
);
5769 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5775 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5776 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5777 to facilitate possible combining with a cmp into 'ands'. */
5779 && GET_CODE (*op0
) == ZERO_EXTEND
5780 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5781 && GET_MODE (XEXP (*op0
, 0)) == QImode
5782 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5783 && subreg_lowpart_p (XEXP (*op0
, 0))
5784 && *op1
== const0_rtx
)
5785 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5788 /* Comparisons smaller than DImode. Only adjust comparisons against
5789 an out-of-range constant. */
5790 if (!CONST_INT_P (*op1
)
5791 || const_ok_for_arm (INTVAL (*op1
))
5792 || const_ok_for_arm (- INTVAL (*op1
)))
5806 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5808 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5809 *code
= *code
== GT
? GE
: LT
;
5817 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5819 *op1
= GEN_INT (i
- 1);
5820 *code
= *code
== GE
? GT
: LE
;
5827 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5828 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5830 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5831 *code
= *code
== GTU
? GEU
: LTU
;
5839 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5841 *op1
= GEN_INT (i
- 1);
5842 *code
= *code
== GEU
? GTU
: LEU
;
5853 /* Define how to find the value returned by a function. */
5856 arm_function_value(const_tree type
, const_tree func
,
5857 bool outgoing ATTRIBUTE_UNUSED
)
5860 int unsignedp ATTRIBUTE_UNUSED
;
5861 rtx r ATTRIBUTE_UNUSED
;
5863 mode
= TYPE_MODE (type
);
5865 if (TARGET_AAPCS_BASED
)
5866 return aapcs_allocate_return_reg (mode
, type
, func
);
5868 /* Promote integer types. */
5869 if (INTEGRAL_TYPE_P (type
))
5870 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5872 /* Promotes small structs returned in a register to full-word size
5873 for big-endian AAPCS. */
5874 if (arm_return_in_msb (type
))
5876 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5877 if (size
% UNITS_PER_WORD
!= 0)
5879 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5880 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
5884 return arm_libcall_value_1 (mode
);
5887 /* libcall hashtable helpers. */
5889 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5891 static inline hashval_t
hash (const rtx_def
*);
5892 static inline bool equal (const rtx_def
*, const rtx_def
*);
5893 static inline void remove (rtx_def
*);
5897 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5899 return rtx_equal_p (p1
, p2
);
5903 libcall_hasher::hash (const rtx_def
*p1
)
5905 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5908 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5911 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5913 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5917 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5919 static bool init_done
= false;
5920 static libcall_table_type
*libcall_htab
= NULL
;
5926 libcall_htab
= new libcall_table_type (31);
5927 add_libcall (libcall_htab
,
5928 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5929 add_libcall (libcall_htab
,
5930 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5931 add_libcall (libcall_htab
,
5932 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5933 add_libcall (libcall_htab
,
5934 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5936 add_libcall (libcall_htab
,
5937 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5938 add_libcall (libcall_htab
,
5939 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5940 add_libcall (libcall_htab
,
5941 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5942 add_libcall (libcall_htab
,
5943 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5945 add_libcall (libcall_htab
,
5946 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5947 add_libcall (libcall_htab
,
5948 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5949 add_libcall (libcall_htab
,
5950 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5951 add_libcall (libcall_htab
,
5952 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5953 add_libcall (libcall_htab
,
5954 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5955 add_libcall (libcall_htab
,
5956 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5957 add_libcall (libcall_htab
,
5958 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5959 add_libcall (libcall_htab
,
5960 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5961 add_libcall (libcall_htab
,
5962 convert_optab_libfunc (sfix_optab
, SImode
, SFmode
));
5963 add_libcall (libcall_htab
,
5964 convert_optab_libfunc (ufix_optab
, SImode
, SFmode
));
5966 /* Values from double-precision helper functions are returned in core
5967 registers if the selected core only supports single-precision
5968 arithmetic, even if we are using the hard-float ABI. The same is
5969 true for single-precision helpers except in case of MVE, because in
5970 MVE we will be using the hard-float ABI on a CPU which doesn't support
5971 single-precision operations in hardware. In MVE the following check
5972 enables use of emulation for the single-precision arithmetic
5974 if (TARGET_HAVE_MVE
)
5976 add_libcall (libcall_htab
, optab_libfunc (add_optab
, SFmode
));
5977 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, SFmode
));
5978 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, SFmode
));
5979 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, SFmode
));
5980 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, SFmode
));
5981 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, SFmode
));
5982 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, SFmode
));
5983 add_libcall (libcall_htab
, optab_libfunc (le_optab
, SFmode
));
5984 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, SFmode
));
5985 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, SFmode
));
5986 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, SFmode
));
5988 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5989 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5990 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5991 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5992 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5993 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5994 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5995 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5996 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5997 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5998 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5999 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
6001 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
6003 add_libcall (libcall_htab
,
6004 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
6007 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
6011 arm_libcall_value_1 (machine_mode mode
)
6013 if (TARGET_AAPCS_BASED
)
6014 return aapcs_libcall_value (mode
);
6015 else if (TARGET_IWMMXT_ABI
6016 && arm_vector_mode_supported_p (mode
))
6017 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
6019 return gen_rtx_REG (mode
, ARG_REGISTER (1));
6022 /* Define how to find the value returned by a library function
6023 assuming the value has mode MODE. */
6026 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
6028 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
6029 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6031 /* The following libcalls return their result in integer registers,
6032 even though they return a floating point value. */
6033 if (arm_libcall_uses_aapcs_base (libcall
))
6034 return gen_rtx_REG (mode
, ARG_REGISTER(1));
6038 return arm_libcall_value_1 (mode
);
6041 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
6044 arm_function_value_regno_p (const unsigned int regno
)
6046 if (regno
== ARG_REGISTER (1)
6048 && TARGET_AAPCS_BASED
6049 && TARGET_HARD_FLOAT
6050 && regno
== FIRST_VFP_REGNUM
)
6051 || (TARGET_IWMMXT_ABI
6052 && regno
== FIRST_IWMMXT_REGNUM
))
6058 /* Determine the amount of memory needed to store the possible return
6059 registers of an untyped call. */
6061 arm_apply_result_size (void)
6067 if (TARGET_HARD_FLOAT_ABI
)
6069 if (TARGET_IWMMXT_ABI
)
6076 /* Decide whether TYPE should be returned in memory (true)
6077 or in a register (false). FNTYPE is the type of the function making
6080 arm_return_in_memory (const_tree type
, const_tree fntype
)
6084 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
6086 if (TARGET_AAPCS_BASED
)
6088 /* Simple, non-aggregate types (ie not including vectors and
6089 complex) are always returned in a register (or registers).
6090 We don't care about which register here, so we can short-cut
6091 some of the detail. */
6092 if (!AGGREGATE_TYPE_P (type
)
6093 && TREE_CODE (type
) != VECTOR_TYPE
6094 && TREE_CODE (type
) != COMPLEX_TYPE
)
6097 /* Any return value that is no larger than one word can be
6099 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
6102 /* Check any available co-processors to see if they accept the
6103 type as a register candidate (VFP, for example, can return
6104 some aggregates in consecutive registers). These aren't
6105 available if the call is variadic. */
6106 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
6109 /* Vector values should be returned using ARM registers, not
6110 memory (unless they're over 16 bytes, which will break since
6111 we only have four call-clobbered registers to play with). */
6112 if (TREE_CODE (type
) == VECTOR_TYPE
)
6113 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
6115 /* The rest go in memory. */
6119 if (TREE_CODE (type
) == VECTOR_TYPE
)
6120 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
6122 if (!AGGREGATE_TYPE_P (type
) &&
6123 (TREE_CODE (type
) != VECTOR_TYPE
))
6124 /* All simple types are returned in registers. */
6127 if (arm_abi
!= ARM_ABI_APCS
)
6129 /* ATPCS and later return aggregate types in memory only if they are
6130 larger than a word (or are variable size). */
6131 return (size
< 0 || size
> UNITS_PER_WORD
);
6134 /* For the arm-wince targets we choose to be compatible with Microsoft's
6135 ARM and Thumb compilers, which always return aggregates in memory. */
6137 /* All structures/unions bigger than one word are returned in memory.
6138 Also catch the case where int_size_in_bytes returns -1. In this case
6139 the aggregate is either huge or of variable size, and in either case
6140 we will want to return it via memory and not in a register. */
6141 if (size
< 0 || size
> UNITS_PER_WORD
)
6144 if (TREE_CODE (type
) == RECORD_TYPE
)
6148 /* For a struct the APCS says that we only return in a register
6149 if the type is 'integer like' and every addressable element
6150 has an offset of zero. For practical purposes this means
6151 that the structure can have at most one non bit-field element
6152 and that this element must be the first one in the structure. */
6154 /* Find the first field, ignoring non FIELD_DECL things which will
6155 have been created by C++. */
6156 /* NOTE: This code is deprecated and has not been updated to handle
6157 DECL_FIELD_ABI_IGNORED. */
6158 for (field
= TYPE_FIELDS (type
);
6159 field
&& TREE_CODE (field
) != FIELD_DECL
;
6160 field
= DECL_CHAIN (field
))
6164 return false; /* An empty structure. Allowed by an extension to ANSI C. */
6166 /* Check that the first field is valid for returning in a register. */
6168 /* ... Floats are not allowed */
6169 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
6172 /* ... Aggregates that are not themselves valid for returning in
6173 a register are not allowed. */
6174 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
6177 /* Now check the remaining fields, if any. Only bitfields are allowed,
6178 since they are not addressable. */
6179 for (field
= DECL_CHAIN (field
);
6181 field
= DECL_CHAIN (field
))
6183 if (TREE_CODE (field
) != FIELD_DECL
)
6186 if (!DECL_BIT_FIELD_TYPE (field
))
6193 if (TREE_CODE (type
) == UNION_TYPE
)
6197 /* Unions can be returned in registers if every element is
6198 integral, or can be returned in an integer register. */
6199 for (field
= TYPE_FIELDS (type
);
6201 field
= DECL_CHAIN (field
))
6203 if (TREE_CODE (field
) != FIELD_DECL
)
6206 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
6209 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
6215 #endif /* not ARM_WINCE */
6217 /* Return all other types in memory. */
6221 const struct pcs_attribute_arg
6225 } pcs_attribute_args
[] =
6227 {"aapcs", ARM_PCS_AAPCS
},
6228 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
6230 /* We could recognize these, but changes would be needed elsewhere
6231 * to implement them. */
6232 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
6233 {"atpcs", ARM_PCS_ATPCS
},
6234 {"apcs", ARM_PCS_APCS
},
6236 {NULL
, ARM_PCS_UNKNOWN
}
6240 arm_pcs_from_attribute (tree attr
)
6242 const struct pcs_attribute_arg
*ptr
;
6245 /* Get the value of the argument. */
6246 if (TREE_VALUE (attr
) == NULL_TREE
6247 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
6248 return ARM_PCS_UNKNOWN
;
6250 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
6252 /* Check it against the list of known arguments. */
6253 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
6254 if (streq (arg
, ptr
->arg
))
6257 /* An unrecognized interrupt type. */
6258 return ARM_PCS_UNKNOWN
;
6261 /* Get the PCS variant to use for this call. TYPE is the function's type
6262 specification, DECL is the specific declartion. DECL may be null if
6263 the call could be indirect or if this is a library call. */
6265 arm_get_pcs_model (const_tree type
, const_tree decl ATTRIBUTE_UNUSED
)
6267 bool user_convention
= false;
6268 enum arm_pcs user_pcs
= arm_pcs_default
;
6273 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
6276 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
6277 user_convention
= true;
6280 if (TARGET_AAPCS_BASED
)
6282 /* Detect varargs functions. These always use the base rules
6283 (no argument is ever a candidate for a co-processor
6285 bool base_rules
= stdarg_p (type
);
6287 if (user_convention
)
6289 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
6290 sorry ("non-AAPCS derived PCS variant");
6291 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
6292 error ("variadic functions must use the base AAPCS variant");
6296 return ARM_PCS_AAPCS
;
6297 else if (user_convention
)
6300 /* Unfortunately, this is not safe and can lead to wrong code
6301 being generated (PR96882). Not all calls into the back-end
6302 pass the DECL, so it is unsafe to make any PCS-changing
6303 decisions based on it. In particular the RETURN_IN_MEMORY
6304 hook is only ever passed a TYPE. This needs revisiting to
6305 see if there are any partial improvements that can be
6307 else if (decl
&& flag_unit_at_a_time
)
6309 /* Local functions never leak outside this compilation unit,
6310 so we are free to use whatever conventions are
6312 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6313 cgraph_node
*local_info_node
6314 = cgraph_node::local_info_node (CONST_CAST_TREE (decl
));
6315 if (local_info_node
&& local_info_node
->local
)
6316 return ARM_PCS_AAPCS_LOCAL
;
6320 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
6321 sorry ("PCS variant");
6323 /* For everything else we use the target's default. */
6324 return arm_pcs_default
;
6329 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6330 const_tree fntype ATTRIBUTE_UNUSED
,
6331 rtx libcall ATTRIBUTE_UNUSED
,
6332 const_tree fndecl ATTRIBUTE_UNUSED
)
6334 /* Record the unallocated VFP registers. */
6335 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
6336 pcum
->aapcs_vfp_reg_alloc
= 0;
6339 /* Bitmasks that indicate whether earlier versions of GCC would have
6340 taken a different path through the ABI logic. This should result in
6341 a -Wpsabi warning if the earlier path led to a different ABI decision.
6343 WARN_PSABI_EMPTY_CXX17_BASE
6344 Indicates that the type includes an artificial empty C++17 base field
6345 that, prior to GCC 10.1, would prevent the type from being treated as
6346 a HFA or HVA. See PR94711 for details.
6348 WARN_PSABI_NO_UNIQUE_ADDRESS
6349 Indicates that the type includes an empty [[no_unique_address]] field
6350 that, prior to GCC 10.1, would prevent the type from being treated as
6352 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE
= 1U << 0;
6353 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS
= 1U << 1;
6354 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD
= 1U << 2;
6356 /* Walk down the type tree of TYPE counting consecutive base elements.
6357 If *MODEP is VOIDmode, then set it to the first valid floating point
6358 type. If a non-floating point type is found, or if a floating point
6359 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6360 otherwise return the count in the sub-tree.
6362 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6363 function has changed its behavior relative to earlier versions of GCC.
6364 Normally the argument should be nonnull and point to a zero-initialized
6365 variable. The function then records whether the ABI decision might
6366 be affected by a known fix to the ABI logic, setting the associated
6367 WARN_PSABI_* bits if so.
6369 When the argument is instead a null pointer, the function tries to
6370 simulate the behavior of GCC before all such ABI fixes were made.
6371 This is useful to check whether the function returns something
6372 different after the ABI fixes. */
6374 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
,
6375 unsigned int *warn_psabi_flags
)
6380 switch (TREE_CODE (type
))
6383 mode
= TYPE_MODE (type
);
6384 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
&& mode
!= BFmode
)
6387 if (*modep
== VOIDmode
)
6396 mode
= TYPE_MODE (TREE_TYPE (type
));
6397 if (mode
!= DFmode
&& mode
!= SFmode
)
6400 if (*modep
== VOIDmode
)
6409 /* Use V2SImode and V4SImode as representatives of all 64-bit
6410 and 128-bit vector types, whether or not those modes are
6411 supported with the present options. */
6412 size
= int_size_in_bytes (type
);
6425 if (*modep
== VOIDmode
)
6428 /* Vector modes are considered to be opaque: two vectors are
6429 equivalent for the purposes of being homogeneous aggregates
6430 if they are the same size. */
6439 tree index
= TYPE_DOMAIN (type
);
6441 /* Can't handle incomplete types nor sizes that are not
6443 if (!COMPLETE_TYPE_P (type
)
6444 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6447 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
,
6451 || !TYPE_MAX_VALUE (index
)
6452 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
6453 || !TYPE_MIN_VALUE (index
)
6454 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
6458 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
6459 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
6461 /* There must be no padding. */
6462 if (wi::to_wide (TYPE_SIZE (type
))
6463 != count
* GET_MODE_BITSIZE (*modep
))
6475 /* Can't handle incomplete types nor sizes that are not
6477 if (!COMPLETE_TYPE_P (type
)
6478 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6481 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6483 if (TREE_CODE (field
) != FIELD_DECL
)
6486 if (DECL_FIELD_ABI_IGNORED (field
))
6488 /* See whether this is something that earlier versions of
6489 GCC failed to ignore. */
6491 if (lookup_attribute ("no_unique_address",
6492 DECL_ATTRIBUTES (field
)))
6493 flag
= WARN_PSABI_NO_UNIQUE_ADDRESS
;
6494 else if (cxx17_empty_base_field_p (field
))
6495 flag
= WARN_PSABI_EMPTY_CXX17_BASE
;
6497 /* No compatibility problem. */
6500 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6501 if (warn_psabi_flags
)
6503 *warn_psabi_flags
|= flag
;
6507 /* A zero-width bitfield may affect layout in some
6508 circumstances, but adds no members. The determination
6509 of whether or not a type is an HFA is performed after
6510 layout is complete, so if the type still looks like an
6511 HFA afterwards, it is still classed as one. This is
6512 potentially an ABI break for the hard-float ABI. */
6513 else if (DECL_BIT_FIELD (field
)
6514 && integer_zerop (DECL_SIZE (field
)))
6516 /* Prior to GCC-12 these fields were striped early,
6517 hiding them from the back-end entirely and
6518 resulting in the correct behaviour for argument
6519 passing. Simulate that old behaviour without
6520 generating a warning. */
6521 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field
))
6523 if (warn_psabi_flags
)
6525 *warn_psabi_flags
|= WARN_PSABI_ZERO_WIDTH_BITFIELD
;
6530 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
,
6537 /* There must be no padding. */
6538 if (wi::to_wide (TYPE_SIZE (type
))
6539 != count
* GET_MODE_BITSIZE (*modep
))
6546 case QUAL_UNION_TYPE
:
6548 /* These aren't very interesting except in a degenerate case. */
6553 /* Can't handle incomplete types nor sizes that are not
6555 if (!COMPLETE_TYPE_P (type
)
6556 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6559 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6561 if (TREE_CODE (field
) != FIELD_DECL
)
6564 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
,
6568 count
= count
> sub_count
? count
: sub_count
;
6571 /* There must be no padding. */
6572 if (wi::to_wide (TYPE_SIZE (type
))
6573 != count
* GET_MODE_BITSIZE (*modep
))
6586 /* Return true if PCS_VARIANT should use VFP registers. */
6588 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
6590 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
6592 static bool seen_thumb1_vfp
= false;
6594 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
6596 sorry ("Thumb-1 %<hard-float%> VFP ABI");
6597 /* sorry() is not immediately fatal, so only display this once. */
6598 seen_thumb1_vfp
= true;
6604 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
6607 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
6608 (TARGET_VFP_DOUBLE
|| !is_double
));
6611 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6612 suitable for passing or returning in VFP registers for the PCS
6613 variant selected. If it is, then *BASE_MODE is updated to contain
6614 a machine mode describing each element of the argument's type and
6615 *COUNT to hold the number of such elements. */
6617 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
6618 machine_mode mode
, const_tree type
,
6619 machine_mode
*base_mode
, int *count
)
6621 machine_mode new_mode
= VOIDmode
;
6623 /* If we have the type information, prefer that to working things
6624 out from the mode. */
6627 unsigned int warn_psabi_flags
= 0;
6628 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
,
6630 if (ag_count
> 0 && ag_count
<= 4)
6632 static unsigned last_reported_type_uid
;
6633 unsigned uid
= TYPE_UID (TYPE_MAIN_VARIANT (type
));
6637 && uid
!= last_reported_type_uid
6638 && ((alt
= aapcs_vfp_sub_candidate (type
, &new_mode
, NULL
))
6642 = CHANGES_ROOT_URL
"gcc-10/changes.html#empty_base";
6644 = CHANGES_ROOT_URL
"gcc-12/changes.html#zero_width_bitfields";
6645 gcc_assert (alt
== -1);
6646 last_reported_type_uid
= uid
;
6647 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6649 if (warn_psabi_flags
& WARN_PSABI_NO_UNIQUE_ADDRESS
)
6650 inform (input_location
, "parameter passing for argument of "
6651 "type %qT with %<[[no_unique_address]]%> members "
6652 "changed %{in GCC 10.1%}",
6653 TYPE_MAIN_VARIANT (type
), url10
);
6654 else if (warn_psabi_flags
& WARN_PSABI_EMPTY_CXX17_BASE
)
6655 inform (input_location
, "parameter passing for argument of "
6656 "type %qT when C++17 is enabled changed to match "
6657 "C++14 %{in GCC 10.1%}",
6658 TYPE_MAIN_VARIANT (type
), url10
);
6659 else if (warn_psabi_flags
& WARN_PSABI_ZERO_WIDTH_BITFIELD
)
6660 inform (input_location
, "parameter passing for argument of "
6661 "type %qT changed %{in GCC 12.1%}",
6662 TYPE_MAIN_VARIANT (type
), url12
);
6669 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
6670 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6671 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6676 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6679 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
6685 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
6688 *base_mode
= new_mode
;
6690 if (TARGET_GENERAL_REGS_ONLY
)
6691 error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6698 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
6699 machine_mode mode
, const_tree type
)
6701 int count ATTRIBUTE_UNUSED
;
6702 machine_mode ag_mode ATTRIBUTE_UNUSED
;
6704 if (!use_vfp_abi (pcs_variant
, false))
6706 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6711 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6714 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6717 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6718 &pcum
->aapcs_vfp_rmode
,
6719 &pcum
->aapcs_vfp_rcount
);
6722 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6723 for the behaviour of this function. */
6726 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6727 const_tree type ATTRIBUTE_UNUSED
)
6730 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6731 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6732 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6735 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6736 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6738 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6740 || (mode
== TImode
&& ! (TARGET_NEON
|| TARGET_HAVE_MVE
))
6741 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6744 int rcount
= pcum
->aapcs_vfp_rcount
;
6746 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6748 if (!(TARGET_NEON
|| TARGET_HAVE_MVE
))
6750 /* Avoid using unsupported vector modes. */
6751 if (rmode
== V2SImode
)
6753 else if (rmode
== V4SImode
)
6760 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6761 for (i
= 0; i
< rcount
; i
++)
6763 rtx tmp
= gen_rtx_REG (rmode
,
6764 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6765 tmp
= gen_rtx_EXPR_LIST
6767 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6768 XVECEXP (par
, 0, i
) = tmp
;
6771 pcum
->aapcs_reg
= par
;
6774 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6780 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6781 comment there for the behaviour of this function. */
6784 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6786 const_tree type ATTRIBUTE_UNUSED
)
6788 if (!use_vfp_abi (pcs_variant
, false))
6792 || (GET_MODE_CLASS (mode
) == MODE_INT
6793 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6794 && !(TARGET_NEON
|| TARGET_HAVE_MVE
)))
6797 machine_mode ag_mode
;
6802 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6805 if (!(TARGET_NEON
|| TARGET_HAVE_MVE
))
6807 if (ag_mode
== V2SImode
)
6809 else if (ag_mode
== V4SImode
)
6815 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6816 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6817 for (i
= 0; i
< count
; i
++)
6819 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6820 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6821 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6822 XVECEXP (par
, 0, i
) = tmp
;
6828 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6832 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6833 machine_mode mode ATTRIBUTE_UNUSED
,
6834 const_tree type ATTRIBUTE_UNUSED
)
6836 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6837 pcum
->aapcs_vfp_reg_alloc
= 0;
6841 #define AAPCS_CP(X) \
6843 aapcs_ ## X ## _cum_init, \
6844 aapcs_ ## X ## _is_call_candidate, \
6845 aapcs_ ## X ## _allocate, \
6846 aapcs_ ## X ## _is_return_candidate, \
6847 aapcs_ ## X ## _allocate_return_reg, \
6848 aapcs_ ## X ## _advance \
6851 /* Table of co-processors that can be used to pass arguments in
6852 registers. Idealy no arugment should be a candidate for more than
6853 one co-processor table entry, but the table is processed in order
6854 and stops after the first match. If that entry then fails to put
6855 the argument into a co-processor register, the argument will go on
6859 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6860 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6862 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6863 BLKmode) is a candidate for this co-processor's registers; this
6864 function should ignore any position-dependent state in
6865 CUMULATIVE_ARGS and only use call-type dependent information. */
6866 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6868 /* Return true if the argument does get a co-processor register; it
6869 should set aapcs_reg to an RTX of the register allocated as is
6870 required for a return from FUNCTION_ARG. */
6871 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6873 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6874 be returned in this co-processor's registers. */
6875 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6877 /* Allocate and return an RTX element to hold the return type of a call. This
6878 routine must not fail and will only be called if is_return_candidate
6879 returned true with the same parameters. */
6880 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6882 /* Finish processing this argument and prepare to start processing
6884 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6885 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6893 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6898 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6899 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6906 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6908 /* We aren't passed a decl, so we can't check that a call is local.
6909 However, it isn't clear that that would be a win anyway, since it
6910 might limit some tail-calling opportunities. */
6911 enum arm_pcs pcs_variant
;
6915 const_tree fndecl
= NULL_TREE
;
6917 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6920 fntype
= TREE_TYPE (fntype
);
6923 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6926 pcs_variant
= arm_pcs_default
;
6928 if (pcs_variant
!= ARM_PCS_AAPCS
)
6932 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6933 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6942 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6945 /* We aren't passed a decl, so we can't check that a call is local.
6946 However, it isn't clear that that would be a win anyway, since it
6947 might limit some tail-calling opportunities. */
6948 enum arm_pcs pcs_variant
;
6949 int unsignedp ATTRIBUTE_UNUSED
;
6953 const_tree fndecl
= NULL_TREE
;
6955 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6958 fntype
= TREE_TYPE (fntype
);
6961 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6964 pcs_variant
= arm_pcs_default
;
6966 /* Promote integer types. */
6967 if (type
&& INTEGRAL_TYPE_P (type
))
6968 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6970 if (pcs_variant
!= ARM_PCS_AAPCS
)
6974 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6975 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6977 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6981 /* Promotes small structs returned in a register to full-word size
6982 for big-endian AAPCS. */
6983 if (type
&& arm_return_in_msb (type
))
6985 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6986 if (size
% UNITS_PER_WORD
!= 0)
6988 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6989 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
6993 return gen_rtx_REG (mode
, R0_REGNUM
);
6997 aapcs_libcall_value (machine_mode mode
)
6999 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
7000 && GET_MODE_SIZE (mode
) <= 4)
7003 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
7006 /* Lay out a function argument using the AAPCS rules. The rule
7007 numbers referred to here are those in the AAPCS. */
7009 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
7010 const_tree type
, bool named
)
7015 /* We only need to do this once per argument. */
7016 if (pcum
->aapcs_arg_processed
)
7019 pcum
->aapcs_arg_processed
= true;
7021 /* Special case: if named is false then we are handling an incoming
7022 anonymous argument which is on the stack. */
7026 /* Is this a potential co-processor register candidate? */
7027 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
7029 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
7030 pcum
->aapcs_cprc_slot
= slot
;
7032 /* We don't have to apply any of the rules from part B of the
7033 preparation phase, these are handled elsewhere in the
7038 /* A Co-processor register candidate goes either in its own
7039 class of registers or on the stack. */
7040 if (!pcum
->aapcs_cprc_failed
[slot
])
7042 /* C1.cp - Try to allocate the argument to co-processor
7044 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
7047 /* C2.cp - Put the argument on the stack and note that we
7048 can't assign any more candidates in this slot. We also
7049 need to note that we have allocated stack space, so that
7050 we won't later try to split a non-cprc candidate between
7051 core registers and the stack. */
7052 pcum
->aapcs_cprc_failed
[slot
] = true;
7053 pcum
->can_split
= false;
7056 /* We didn't get a register, so this argument goes on the
7058 gcc_assert (pcum
->can_split
== false);
7063 /* C3 - For double-word aligned arguments, round the NCRN up to the
7064 next even number. */
7065 ncrn
= pcum
->aapcs_ncrn
;
7068 int res
= arm_needs_doubleword_align (mode
, type
);
7069 /* Only warn during RTL expansion of call stmts, otherwise we would
7070 warn e.g. during gimplification even on functions that will be
7071 always inlined, and we'd warn multiple times. Don't warn when
7072 called in expand_function_start either, as we warn instead in
7073 arm_function_arg_boundary in that case. */
7074 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
7075 inform (input_location
, "parameter passing for argument of type "
7076 "%qT changed in GCC 7.1", type
);
7081 nregs
= ARM_NUM_REGS2(mode
, type
);
7083 /* Sigh, this test should really assert that nregs > 0, but a GCC
7084 extension allows empty structs and then gives them empty size; it
7085 then allows such a structure to be passed by value. For some of
7086 the code below we have to pretend that such an argument has
7087 non-zero size so that we 'locate' it correctly either in
7088 registers or on the stack. */
7089 gcc_assert (nregs
>= 0);
7091 nregs2
= nregs
? nregs
: 1;
7093 /* C4 - Argument fits entirely in core registers. */
7094 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
7096 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
7097 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
7101 /* C5 - Some core registers left and there are no arguments already
7102 on the stack: split this argument between the remaining core
7103 registers and the stack. */
7104 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
7106 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
7107 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
7108 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
7112 /* C6 - NCRN is set to 4. */
7113 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
7115 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
7119 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7120 for a call to a function whose data type is FNTYPE.
7121 For a library call, FNTYPE is NULL. */
7123 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
7125 tree fndecl ATTRIBUTE_UNUSED
)
7127 /* Long call handling. */
7129 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
7131 pcum
->pcs_variant
= arm_pcs_default
;
7133 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7135 if (arm_libcall_uses_aapcs_base (libname
))
7136 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
7138 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
7139 pcum
->aapcs_reg
= NULL_RTX
;
7140 pcum
->aapcs_partial
= 0;
7141 pcum
->aapcs_arg_processed
= false;
7142 pcum
->aapcs_cprc_slot
= -1;
7143 pcum
->can_split
= true;
7145 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
7149 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
7151 pcum
->aapcs_cprc_failed
[i
] = false;
7152 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
7160 /* On the ARM, the offset starts at 0. */
7162 pcum
->iwmmxt_nregs
= 0;
7163 pcum
->can_split
= true;
7165 /* Varargs vectors are treated the same as long long.
7166 named_count avoids having to change the way arm handles 'named' */
7167 pcum
->named_count
= 0;
7170 if (TARGET_REALLY_IWMMXT
&& fntype
)
7174 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
7176 fn_arg
= TREE_CHAIN (fn_arg
))
7177 pcum
->named_count
+= 1;
7179 if (! pcum
->named_count
)
7180 pcum
->named_count
= INT_MAX
;
7184 /* Return 2 if double word alignment is required for argument passing,
7185 but wasn't required before the fix for PR88469.
7186 Return 1 if double word alignment is required for argument passing.
7187 Return -1 if double word alignment used to be required for argument
7188 passing before PR77728 ABI fix, but is not required anymore.
7189 Return 0 if double word alignment is not required and wasn't requried
7192 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
7195 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
7197 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
7198 if (!AGGREGATE_TYPE_P (type
))
7199 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
7201 /* Array types: Use member alignment of element type. */
7202 if (TREE_CODE (type
) == ARRAY_TYPE
)
7203 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
7207 /* Record/aggregate types: Use greatest member alignment of any member.
7209 Note that we explicitly consider zero-sized fields here, even though
7210 they don't map to AAPCS machine types. For example, in:
7212 struct __attribute__((aligned(8))) empty {};
7215 [[no_unique_address]] empty e;
7219 "s" contains only one Fundamental Data Type (the int field)
7220 but gains 8-byte alignment and size thanks to "e". */
7221 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7222 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
7224 if (TREE_CODE (field
) == FIELD_DECL
)
7227 /* Before PR77728 fix, we were incorrectly considering also
7228 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7229 Make sure we can warn about that with -Wpsabi. */
7232 else if (TREE_CODE (field
) == FIELD_DECL
7233 && DECL_BIT_FIELD_TYPE (field
)
7234 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field
)) > PARM_BOUNDARY
)
7244 /* Determine where to put an argument to a function.
7245 Value is zero to push the argument on the stack,
7246 or a hard register in which to store the argument.
7248 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7249 the preceding args and about the function being called.
7250 ARG is a description of the argument.
7252 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7253 other arguments are passed on the stack. If (NAMED == 0) (which happens
7254 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7255 defined), say it is passed in the stack (function_prologue will
7256 indeed make it pass in the stack if necessary). */
7259 arm_function_arg (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
7261 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7264 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7265 a call insn (op3 of a call_value insn). */
7266 if (arg
.end_marker_p ())
7269 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7271 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7272 return pcum
->aapcs_reg
;
7275 /* Varargs vectors are treated the same as long long.
7276 named_count avoids having to change the way arm handles 'named' */
7277 if (TARGET_IWMMXT_ABI
7278 && arm_vector_mode_supported_p (arg
.mode
)
7279 && pcum
->named_count
> pcum
->nargs
+ 1)
7281 if (pcum
->iwmmxt_nregs
<= 9)
7282 return gen_rtx_REG (arg
.mode
,
7283 pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
7286 pcum
->can_split
= false;
7291 /* Put doubleword aligned quantities in even register pairs. */
7292 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
7294 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
7295 if (res
< 0 && warn_psabi
)
7296 inform (input_location
, "parameter passing for argument of type "
7297 "%qT changed in GCC 7.1", arg
.type
);
7301 if (res
> 1 && warn_psabi
)
7302 inform (input_location
, "parameter passing for argument of type "
7303 "%qT changed in GCC 9.1", arg
.type
);
7307 /* Only allow splitting an arg between regs and memory if all preceding
7308 args were allocated to regs. For args passed by reference we only count
7309 the reference pointer. */
7310 if (pcum
->can_split
)
7313 nregs
= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
7315 if (!arg
.named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
7318 return gen_rtx_REG (arg
.mode
, pcum
->nregs
);
7322 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
7324 if (!ARM_DOUBLEWORD_ALIGN
)
7325 return PARM_BOUNDARY
;
7327 int res
= arm_needs_doubleword_align (mode
, type
);
7328 if (res
< 0 && warn_psabi
)
7329 inform (input_location
, "parameter passing for argument of type %qT "
7330 "changed in GCC 7.1", type
);
7331 if (res
> 1 && warn_psabi
)
7332 inform (input_location
, "parameter passing for argument of type "
7333 "%qT changed in GCC 9.1", type
);
7335 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
7339 arm_arg_partial_bytes (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
7341 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7342 int nregs
= pcum
->nregs
;
7344 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7346 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7347 return pcum
->aapcs_partial
;
7350 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (arg
.mode
))
7353 if (NUM_ARG_REGS
> nregs
7354 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (arg
.mode
, arg
.type
))
7356 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
7361 /* Update the data in PCUM to advance over argument ARG. */
7364 arm_function_arg_advance (cumulative_args_t pcum_v
,
7365 const function_arg_info
&arg
)
7367 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7369 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7371 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7373 if (pcum
->aapcs_cprc_slot
>= 0)
7375 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, arg
.mode
,
7377 pcum
->aapcs_cprc_slot
= -1;
7380 /* Generic stuff. */
7381 pcum
->aapcs_arg_processed
= false;
7382 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
7383 pcum
->aapcs_reg
= NULL_RTX
;
7384 pcum
->aapcs_partial
= 0;
7389 if (arm_vector_mode_supported_p (arg
.mode
)
7390 && pcum
->named_count
> pcum
->nargs
7391 && TARGET_IWMMXT_ABI
)
7392 pcum
->iwmmxt_nregs
+= 1;
7394 pcum
->nregs
+= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
7398 /* Variable sized types are passed by reference. This is a GCC
7399 extension to the ARM ABI. */
7402 arm_pass_by_reference (cumulative_args_t
, const function_arg_info
&arg
)
7404 return arg
.type
&& TREE_CODE (TYPE_SIZE (arg
.type
)) != INTEGER_CST
;
7407 /* Encode the current state of the #pragma [no_]long_calls. */
7410 OFF
, /* No #pragma [no_]long_calls is in effect. */
7411 LONG
, /* #pragma long_calls is in effect. */
7412 SHORT
/* #pragma no_long_calls is in effect. */
7415 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
7418 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7420 arm_pragma_long_calls
= LONG
;
7424 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7426 arm_pragma_long_calls
= SHORT
;
7430 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7432 arm_pragma_long_calls
= OFF
;
7435 /* Handle an attribute requiring a FUNCTION_DECL;
7436 arguments as in struct attribute_spec.handler. */
7438 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
7439 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7441 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7443 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7445 *no_add_attrs
= true;
7451 /* Handle an "interrupt" or "isr" attribute;
7452 arguments as in struct attribute_spec.handler. */
7454 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
7459 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7461 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7463 *no_add_attrs
= true;
7465 else if (TARGET_VFP_BASE
)
7467 warning (OPT_Wattributes
, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7470 /* FIXME: the argument if any is checked for type attributes;
7471 should it be checked for decl ones? */
7475 if (FUNC_OR_METHOD_TYPE_P (*node
))
7477 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
7479 warning (OPT_Wattributes
, "%qE attribute ignored",
7481 *no_add_attrs
= true;
7484 else if (TREE_CODE (*node
) == POINTER_TYPE
7485 && FUNC_OR_METHOD_TYPE_P (TREE_TYPE (*node
))
7486 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
7488 *node
= build_variant_type_copy (*node
);
7489 TREE_TYPE (*node
) = build_type_attribute_variant
7491 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
7492 *no_add_attrs
= true;
7496 /* Possibly pass this attribute on from the type to a decl. */
7497 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
7498 | (int) ATTR_FLAG_FUNCTION_NEXT
7499 | (int) ATTR_FLAG_ARRAY_NEXT
))
7501 *no_add_attrs
= true;
7502 return tree_cons (name
, args
, NULL_TREE
);
7506 warning (OPT_Wattributes
, "%qE attribute ignored",
7515 /* Handle a "pcs" attribute; arguments as in struct
7516 attribute_spec.handler. */
7518 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
7519 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7521 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
7523 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
7524 *no_add_attrs
= true;
7529 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7530 /* Handle the "notshared" attribute. This attribute is another way of
7531 requesting hidden visibility. ARM's compiler supports
7532 "__declspec(notshared)"; we support the same thing via an
7536 arm_handle_notshared_attribute (tree
*node
,
7537 tree name ATTRIBUTE_UNUSED
,
7538 tree args ATTRIBUTE_UNUSED
,
7539 int flags ATTRIBUTE_UNUSED
,
7542 tree decl
= TYPE_NAME (*node
);
7546 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
7547 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
7548 *no_add_attrs
= false;
7554 /* This function returns true if a function with declaration FNDECL and type
7555 FNTYPE uses the stack to pass arguments or return variables and false
7556 otherwise. This is used for functions with the attributes
7557 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7558 diagnostic messages if the stack is used. NAME is the name of the attribute
7562 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
7564 function_args_iterator args_iter
;
7565 CUMULATIVE_ARGS args_so_far_v
;
7566 cumulative_args_t args_so_far
;
7567 bool first_param
= true;
7568 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
7570 /* Error out if any argument is passed on the stack. */
7571 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
7572 args_so_far
= pack_cumulative_args (&args_so_far_v
);
7573 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
7577 prev_arg_type
= arg_type
;
7578 if (VOID_TYPE_P (arg_type
))
7581 function_arg_info
arg (arg_type
, /*named=*/true);
7583 /* ??? We should advance after processing the argument and pass
7584 the argument we're advancing past. */
7585 arm_function_arg_advance (args_so_far
, arg
);
7586 arg_rtx
= arm_function_arg (args_so_far
, arg
);
7587 if (!arg_rtx
|| arm_arg_partial_bytes (args_so_far
, arg
))
7589 error ("%qE attribute not available to functions with arguments "
7590 "passed on the stack", name
);
7593 first_param
= false;
7596 /* Error out for variadic functions since we cannot control how many
7597 arguments will be passed and thus stack could be used. stdarg_p () is not
7598 used for the checking to avoid browsing arguments twice. */
7599 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
7601 error ("%qE attribute not available to functions with variable number "
7602 "of arguments", name
);
7606 /* Error out if return value is passed on the stack. */
7607 ret_type
= TREE_TYPE (fntype
);
7608 if (arm_return_in_memory (ret_type
, fntype
))
7610 error ("%qE attribute not available to functions that return value on "
7617 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7618 function will check whether the attribute is allowed here and will add the
7619 attribute to the function declaration tree or otherwise issue a warning. */
7622 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
7631 *no_add_attrs
= true;
7632 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7637 /* Ignore attribute for function types. */
7638 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7640 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7642 *no_add_attrs
= true;
7648 /* Warn for static linkage functions. */
7649 if (!TREE_PUBLIC (fndecl
))
7651 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
7652 "with static linkage", name
);
7653 *no_add_attrs
= true;
7657 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
7658 TREE_TYPE (fndecl
));
7663 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7664 function will check whether the attribute is allowed here and will add the
7665 attribute to the function type tree or otherwise issue a diagnostic. The
7666 reason we check this at declaration time is to only allow the use of the
7667 attribute with declarations of function pointers and not function
7668 declarations. This function checks NODE is of the expected type and issues
7669 diagnostics otherwise using NAME. If it is not of the expected type
7670 *NO_ADD_ATTRS will be set to true. */
7673 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
7678 tree decl
= NULL_TREE
;
7683 *no_add_attrs
= true;
7684 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7691 fntype
= TREE_TYPE (*node
);
7693 if (VAR_P (*node
) || TREE_CODE (*node
) == TYPE_DECL
)
7699 while (fntype
&& TREE_CODE (fntype
) == POINTER_TYPE
)
7700 fntype
= TREE_TYPE (fntype
);
7702 if ((DECL_P (*node
) && !decl
) || TREE_CODE (fntype
) != FUNCTION_TYPE
)
7704 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
7705 "function pointer", name
);
7706 *no_add_attrs
= true;
7710 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
7715 /* Prevent trees being shared among function types with and without
7716 cmse_nonsecure_call attribute. */
7719 type
= build_distinct_type_copy (TREE_TYPE (decl
));
7720 TREE_TYPE (decl
) = type
;
7724 type
= build_distinct_type_copy (*node
);
7730 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
7733 fntype
= TREE_TYPE (fntype
);
7734 fntype
= build_distinct_type_copy (fntype
);
7735 TREE_TYPE (type
) = fntype
;
7738 /* Construct a type attribute and add it to the function type. */
7739 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
7740 TYPE_ATTRIBUTES (fntype
));
7741 TYPE_ATTRIBUTES (fntype
) = attrs
;
7745 /* Return 0 if the attributes for two types are incompatible, 1 if they
7746 are compatible, and 2 if they are nearly compatible (which causes a
7747 warning to be generated). */
7749 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7753 tree attrs1
= lookup_attribute ("Advanced SIMD type",
7754 TYPE_ATTRIBUTES (type1
));
7755 tree attrs2
= lookup_attribute ("Advanced SIMD type",
7756 TYPE_ATTRIBUTES (type2
));
7757 if (bool (attrs1
) != bool (attrs2
))
7759 if (attrs1
&& !attribute_value_equal (attrs1
, attrs2
))
7762 /* Check for mismatch of non-default calling convention. */
7763 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7766 /* Check for mismatched call attributes. */
7767 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7768 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7769 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7770 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7772 /* Only bother to check if an attribute is defined. */
7773 if (l1
| l2
| s1
| s2
)
7775 /* If one type has an attribute, the other must have the same attribute. */
7776 if ((l1
!= l2
) || (s1
!= s2
))
7779 /* Disallow mixed attributes. */
7780 if ((l1
& s2
) || (l2
& s1
))
7784 /* Check for mismatched ISR attribute. */
7785 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7787 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7788 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7790 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7794 l1
= lookup_attribute ("cmse_nonsecure_call",
7795 TYPE_ATTRIBUTES (type1
)) != NULL
;
7796 l2
= lookup_attribute ("cmse_nonsecure_call",
7797 TYPE_ATTRIBUTES (type2
)) != NULL
;
7805 /* Assigns default attributes to newly defined type. This is used to
7806 set short_call/long_call attributes for function types of
7807 functions defined inside corresponding #pragma scopes. */
7809 arm_set_default_type_attributes (tree type
)
7811 /* Add __attribute__ ((long_call)) to all functions, when
7812 inside #pragma long_calls or __attribute__ ((short_call)),
7813 when inside #pragma no_long_calls. */
7814 if (FUNC_OR_METHOD_TYPE_P (type
))
7816 tree type_attr_list
, attr_name
;
7817 type_attr_list
= TYPE_ATTRIBUTES (type
);
7819 if (arm_pragma_long_calls
== LONG
)
7820 attr_name
= get_identifier ("long_call");
7821 else if (arm_pragma_long_calls
== SHORT
)
7822 attr_name
= get_identifier ("short_call");
7826 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7827 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7831 /* Return true if DECL is known to be linked into section SECTION. */
7834 arm_function_in_section_p (tree decl
, section
*section
)
7836 /* We can only be certain about the prevailing symbol definition. */
7837 if (!decl_binds_to_current_def_p (decl
))
7840 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7841 if (!DECL_SECTION_NAME (decl
))
7843 /* Make sure that we will not create a unique section for DECL. */
7844 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7848 return function_section (decl
) == section
;
7851 /* Return nonzero if a 32-bit "long_call" should be generated for
7852 a call from the current function to DECL. We generate a long_call
7855 a. has an __attribute__((long call))
7856 or b. is within the scope of a #pragma long_calls
7857 or c. the -mlong-calls command line switch has been specified
7859 However we do not generate a long call if the function:
7861 d. has an __attribute__ ((short_call))
7862 or e. is inside the scope of a #pragma no_long_calls
7863 or f. is defined in the same section as the current function. */
7866 arm_is_long_call_p (tree decl
)
7871 return TARGET_LONG_CALLS
;
7873 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7874 if (lookup_attribute ("short_call", attrs
))
7877 /* For "f", be conservative, and only cater for cases in which the
7878 whole of the current function is placed in the same section. */
7879 if (!flag_reorder_blocks_and_partition
7880 && TREE_CODE (decl
) == FUNCTION_DECL
7881 && arm_function_in_section_p (decl
, current_function_section ()))
7884 if (lookup_attribute ("long_call", attrs
))
7887 return TARGET_LONG_CALLS
;
7890 /* Return nonzero if it is ok to make a tail-call to DECL. */
7892 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7894 unsigned long func_type
;
7896 if (cfun
->machine
->sibcall_blocked
)
7901 /* In FDPIC, never tailcall something for which we have no decl:
7902 the target function could be in a different module, requiring
7903 a different FDPIC register value. */
7908 /* Never tailcall something if we are generating code for Thumb-1. */
7912 /* The PIC register is live on entry to VxWorks PLT entries, so we
7913 must make the call before restoring the PIC register. */
7914 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7917 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7918 may be used both as target of the call and base register for restoring
7919 the VFP registers */
7920 if (TARGET_APCS_FRAME
&& TARGET_ARM
7921 && TARGET_HARD_FLOAT
7922 && decl
&& arm_is_long_call_p (decl
))
7925 /* If we are interworking and the function is not declared static
7926 then we can't tail-call it unless we know that it exists in this
7927 compilation unit (since it might be a Thumb routine). */
7928 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7929 && !TREE_ASM_WRITTEN (decl
))
7932 func_type
= arm_current_func_type ();
7933 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7934 if (IS_INTERRUPT (func_type
))
7937 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7938 generated for entry functions themselves. */
7939 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7942 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7943 this would complicate matters for later code generation. */
7944 if (TREE_CODE (exp
) == CALL_EXPR
)
7946 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7947 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7951 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7953 /* Check that the return value locations are the same. For
7954 example that we aren't returning a value from the sibling in
7955 a VFP register but then need to transfer it to a core
7958 tree decl_or_type
= decl
;
7960 /* If it is an indirect function pointer, get the function type. */
7962 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7964 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7965 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7967 if (!rtx_equal_p (a
, b
))
7971 /* Never tailcall if function may be called with a misaligned SP. */
7972 if (IS_STACKALIGN (func_type
))
7975 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7976 references should become a NOP. Don't convert such calls into
7978 if (TARGET_AAPCS_BASED
7979 && arm_abi
== ARM_ABI_AAPCS
7981 && DECL_WEAK (decl
))
7984 /* We cannot do a tailcall for an indirect call by descriptor if all the
7985 argument registers are used because the only register left to load the
7986 address is IP and it will already contain the static chain. */
7987 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7989 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7990 CUMULATIVE_ARGS cum
;
7991 cumulative_args_t cum_v
;
7993 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7994 cum_v
= pack_cumulative_args (&cum
);
7996 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7998 tree type
= TREE_VALUE (t
);
7999 if (!VOID_TYPE_P (type
))
8001 function_arg_info
arg (type
, /*named=*/true);
8002 arm_function_arg_advance (cum_v
, arg
);
8006 function_arg_info
arg (integer_type_node
, /*named=*/true);
8007 if (!arm_function_arg (cum_v
, arg
))
8011 /* Everything else is ok. */
8016 /* Addressing mode support functions. */
8018 /* Return nonzero if X is a legitimate immediate operand when compiling
8019 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
8021 legitimate_pic_operand_p (rtx x
)
8023 if (SYMBOL_REF_P (x
)
8024 || (GET_CODE (x
) == CONST
8025 && GET_CODE (XEXP (x
, 0)) == PLUS
8026 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
8032 /* Record that the current function needs a PIC register. If PIC_REG is null,
8033 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
8034 both case cfun->machine->pic_reg is initialized if we have not already done
8035 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
8036 PIC register is reloaded in the current position of the instruction stream
8037 irregardless of whether it was loaded before. Otherwise, it is only loaded
8038 if not already done so (crtl->uses_pic_offset_table is null). Note that
8039 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
8040 is only supported iff COMPUTE_NOW is false. */
8043 require_pic_register (rtx pic_reg
, bool compute_now
)
8045 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
8047 /* A lot of the logic here is made obscure by the fact that this
8048 routine gets called as part of the rtx cost estimation process.
8049 We don't want those calls to affect any assumptions about the real
8050 function; and further, we can't call entry_of_function() until we
8051 start the real expansion process. */
8052 if (!crtl
->uses_pic_offset_table
|| compute_now
)
8054 gcc_assert (can_create_pseudo_p ()
8055 || (pic_reg
!= NULL_RTX
8057 && GET_MODE (pic_reg
) == Pmode
));
8058 if (arm_pic_register
!= INVALID_REGNUM
8060 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
8062 if (!cfun
->machine
->pic_reg
)
8063 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
8065 /* Play games to avoid marking the function as needing pic
8066 if we are being called as part of the cost-estimation
8068 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
8069 crtl
->uses_pic_offset_table
= 1;
8073 rtx_insn
*seq
, *insn
;
8075 if (pic_reg
== NULL_RTX
)
8076 pic_reg
= gen_reg_rtx (Pmode
);
8077 if (!cfun
->machine
->pic_reg
)
8078 cfun
->machine
->pic_reg
= pic_reg
;
8080 /* Play games to avoid marking the function as needing pic
8081 if we are being called as part of the cost-estimation
8083 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
8085 crtl
->uses_pic_offset_table
= 1;
8088 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
8089 && arm_pic_register
> LAST_LO_REGNUM
8091 emit_move_insn (cfun
->machine
->pic_reg
,
8092 gen_rtx_REG (Pmode
, arm_pic_register
));
8094 arm_load_pic_register (0UL, pic_reg
);
8099 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
8101 INSN_LOCATION (insn
) = prologue_location
;
8103 /* We can be called during expansion of PHI nodes, where
8104 we can't yet emit instructions directly in the final
8105 insn stream. Queue the insns on the entry edge, they will
8106 be committed after everything else is expanded. */
8107 if (currently_expanding_to_rtl
)
8108 insert_insn_on_edge (seq
,
8110 (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
8118 /* Generate insns to calculate the address of ORIG in pic mode. */
8120 calculate_pic_address_constant (rtx reg
, rtx pic_reg
, rtx orig
)
8125 pat
= gen_calculate_pic_address (reg
, pic_reg
, orig
);
8127 /* Make the MEM as close to a constant as possible. */
8128 mem
= SET_SRC (pat
);
8129 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
8130 MEM_READONLY_P (mem
) = 1;
8131 MEM_NOTRAP_P (mem
) = 1;
8133 return emit_insn (pat
);
8136 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
8137 created to hold the result of the load. If not NULL, PIC_REG indicates
8138 which register to use as PIC register, otherwise it is decided by register
8139 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
8140 location in the instruction stream, irregardless of whether it was loaded
8141 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8142 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8144 Returns the register REG into which the PIC load is performed. */
8147 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
, rtx pic_reg
,
8150 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
8152 if (SYMBOL_REF_P (orig
)
8153 || LABEL_REF_P (orig
))
8157 gcc_assert (can_create_pseudo_p ());
8158 reg
= gen_reg_rtx (Pmode
);
8161 /* VxWorks does not impose a fixed gap between segments; the run-time
8162 gap can be different from the object-file gap. We therefore can't
8163 use GOTOFF unless we are absolutely sure that the symbol is in the
8164 same segment as the GOT. Unfortunately, the flexibility of linker
8165 scripts means that we can't be sure of that in general, so assume
8166 that GOTOFF is never valid on VxWorks. */
8167 /* References to weak symbols cannot be resolved locally: they
8168 may be overridden by a non-weak definition at link time. */
8170 if ((LABEL_REF_P (orig
)
8171 || (SYMBOL_REF_P (orig
)
8172 && SYMBOL_REF_LOCAL_P (orig
)
8173 && (SYMBOL_REF_DECL (orig
)
8174 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)
8175 && (!SYMBOL_REF_FUNCTION_P (orig
)
8176 || arm_fdpic_local_funcdesc_p (orig
))))
8178 && arm_pic_data_is_text_relative
)
8179 insn
= arm_pic_static_addr (orig
, reg
);
8182 /* If this function doesn't have a pic register, create one now. */
8183 require_pic_register (pic_reg
, compute_now
);
8185 if (pic_reg
== NULL_RTX
)
8186 pic_reg
= cfun
->machine
->pic_reg
;
8188 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
8191 /* Put a REG_EQUAL note on this insn, so that it can be optimized
8193 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
8197 else if (GET_CODE (orig
) == CONST
)
8201 if (GET_CODE (XEXP (orig
, 0)) == PLUS
8202 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
8205 /* Handle the case where we have: const (UNSPEC_TLS). */
8206 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
8207 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
8210 /* Handle the case where we have:
8211 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
8213 if (GET_CODE (XEXP (orig
, 0)) == PLUS
8214 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
8215 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
8217 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
8223 gcc_assert (can_create_pseudo_p ());
8224 reg
= gen_reg_rtx (Pmode
);
8227 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
8229 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
,
8230 pic_reg
, compute_now
);
8231 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
8232 base
== reg
? 0 : reg
, pic_reg
,
8235 if (CONST_INT_P (offset
))
8237 /* The base register doesn't really matter, we only want to
8238 test the index for the appropriate mode. */
8239 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
8241 gcc_assert (can_create_pseudo_p ());
8242 offset
= force_reg (Pmode
, offset
);
8245 if (CONST_INT_P (offset
))
8246 return plus_constant (Pmode
, base
, INTVAL (offset
));
8249 if (GET_MODE_SIZE (mode
) > 4
8250 && (GET_MODE_CLASS (mode
) == MODE_INT
8251 || TARGET_SOFT_FLOAT
))
8253 emit_insn (gen_addsi3 (reg
, base
, offset
));
8257 return gen_rtx_PLUS (Pmode
, base
, offset
);
8264 /* Generate insns that produce the address of the stack canary */
8266 arm_stack_protect_tls_canary_mem (bool reload
)
8268 rtx tp
= gen_reg_rtx (SImode
);
8270 emit_insn (gen_reload_tp_hard (tp
));
8272 emit_insn (gen_load_tp_hard (tp
));
8274 rtx reg
= gen_reg_rtx (SImode
);
8275 rtx offset
= GEN_INT (arm_stack_protector_guard_offset
);
8276 emit_set_insn (reg
, gen_rtx_PLUS (SImode
, tp
, offset
));
8277 return gen_rtx_MEM (SImode
, reg
);
8281 /* Whether a register is callee saved or not. This is necessary because high
8282 registers are marked as caller saved when optimizing for size on Thumb-1
8283 targets despite being callee saved in order to avoid using them. */
8284 #define callee_saved_reg_p(reg) \
8285 (!call_used_or_fixed_reg_p (reg) \
8286 || (TARGET_THUMB1 && optimize_size \
8287 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8289 /* Return a mask for the call-clobbered low registers that are unused
8290 at the end of the prologue. */
8291 static unsigned long
8292 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8294 unsigned long mask
= 0;
8295 bitmap prologue_live_out
= df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
8297 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
8298 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (prologue_live_out
, reg
))
8299 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
8303 /* Similarly for the start of the epilogue. */
8304 static unsigned long
8305 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8307 unsigned long mask
= 0;
8308 bitmap epilogue_live_in
= df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun
));
8310 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
8311 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (epilogue_live_in
, reg
))
8312 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
8316 /* Find a spare register to use during the prolog of a function. */
8319 thumb_find_work_register (unsigned long pushed_regs_mask
)
8323 unsigned long unused_regs
8324 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8326 /* Check the argument registers first as these are call-used. The
8327 register allocation order means that sometimes r3 might be used
8328 but earlier argument registers might not, so check them all. */
8329 for (reg
= LAST_LO_REGNUM
; reg
>= FIRST_LO_REGNUM
; reg
--)
8330 if (unused_regs
& (1 << (reg
- FIRST_LO_REGNUM
)))
8333 /* Otherwise look for a call-saved register that is going to be pushed. */
8334 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
8335 if (pushed_regs_mask
& (1 << reg
))
8340 /* Thumb-2 can use high regs. */
8341 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
8342 if (pushed_regs_mask
& (1 << reg
))
8345 /* Something went wrong - thumb_compute_save_reg_mask()
8346 should have arranged for a suitable register to be pushed. */
8350 static GTY(()) int pic_labelno
;
8352 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8356 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
, rtx pic_reg
)
8358 rtx l1
, labelno
, pic_tmp
, pic_rtx
;
8360 if (crtl
->uses_pic_offset_table
== 0
8361 || TARGET_SINGLE_PIC_BASE
8365 gcc_assert (flag_pic
);
8367 if (pic_reg
== NULL_RTX
)
8368 pic_reg
= cfun
->machine
->pic_reg
;
8369 if (TARGET_VXWORKS_RTP
)
8371 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
8372 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
8373 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
8375 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
8377 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8378 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
8382 /* We use an UNSPEC rather than a LABEL_REF because this label
8383 never appears in the code stream. */
8385 labelno
= GEN_INT (pic_labelno
++);
8386 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8387 l1
= gen_rtx_CONST (VOIDmode
, l1
);
8389 /* On the ARM the PC register contains 'dot + 8' at the time of the
8390 addition, on the Thumb it is 'dot + 4'. */
8391 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
8392 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
8394 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
8398 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8400 else /* TARGET_THUMB1 */
8402 if (arm_pic_register
!= INVALID_REGNUM
8403 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
8405 /* We will have pushed the pic register, so we should always be
8406 able to find a work register. */
8407 pic_tmp
= gen_rtx_REG (SImode
,
8408 thumb_find_work_register (saved_regs
));
8409 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
8410 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
8411 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
8413 else if (arm_pic_register
!= INVALID_REGNUM
8414 && arm_pic_register
> LAST_LO_REGNUM
8415 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
8417 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8418 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
8419 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
8422 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8426 /* Need to emit this whether or not we obey regdecls,
8427 since setjmp/longjmp can cause life info to screw up. */
8431 /* Try to determine whether an object, referenced via ORIG, will be
8432 placed in the text or data segment. This is used in FDPIC mode, to
8433 decide which relocations to use when accessing ORIG. *IS_READONLY
8434 is set to true if ORIG is a read-only location, false otherwise.
8435 Return true if we could determine the location of ORIG, false
8436 otherwise. *IS_READONLY is valid only when we return true. */
8438 arm_is_segment_info_known (rtx orig
, bool *is_readonly
)
8440 *is_readonly
= false;
8442 if (LABEL_REF_P (orig
))
8444 *is_readonly
= true;
8448 if (SYMBOL_REF_P (orig
))
8450 if (CONSTANT_POOL_ADDRESS_P (orig
))
8452 *is_readonly
= true;
8455 if (SYMBOL_REF_LOCAL_P (orig
)
8456 && !SYMBOL_REF_EXTERNAL_P (orig
)
8457 && SYMBOL_REF_DECL (orig
)
8458 && (!DECL_P (SYMBOL_REF_DECL (orig
))
8459 || !DECL_COMMON (SYMBOL_REF_DECL (orig
))))
8461 tree decl
= SYMBOL_REF_DECL (orig
);
8462 tree init
= VAR_P (decl
)
8463 ? DECL_INITIAL (decl
) : (TREE_CODE (decl
) == CONSTRUCTOR
)
8466 bool named_section
, readonly
;
8468 if (init
&& init
!= error_mark_node
)
8469 reloc
= compute_reloc_for_constant (init
);
8471 named_section
= VAR_P (decl
)
8472 && lookup_attribute ("section", DECL_ATTRIBUTES (decl
));
8473 readonly
= decl_readonly_section (decl
, reloc
);
8475 /* We don't know where the link script will put a named
8476 section, so return false in such a case. */
8480 *is_readonly
= readonly
;
8484 /* We don't know. */
8491 /* Generate code to load the address of a static var when flag_pic is set. */
8493 arm_pic_static_addr (rtx orig
, rtx reg
)
8495 rtx l1
, labelno
, offset_rtx
;
8498 gcc_assert (flag_pic
);
8500 bool is_readonly
= false;
8501 bool info_known
= false;
8504 && SYMBOL_REF_P (orig
)
8505 && !SYMBOL_REF_FUNCTION_P (orig
))
8506 info_known
= arm_is_segment_info_known (orig
, &is_readonly
);
8509 && SYMBOL_REF_P (orig
)
8510 && !SYMBOL_REF_FUNCTION_P (orig
)
8513 /* We don't know where orig is stored, so we have be
8514 pessimistic and use a GOT relocation. */
8515 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
8517 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
8519 else if (TARGET_FDPIC
8520 && SYMBOL_REF_P (orig
)
8521 && (SYMBOL_REF_FUNCTION_P (orig
)
8524 /* We use the GOTOFF relocation. */
8525 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
8527 rtx l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, orig
), UNSPEC_PIC_SYM
);
8528 emit_insn (gen_movsi (reg
, l1
));
8529 insn
= emit_insn (gen_addsi3 (reg
, reg
, pic_reg
));
8533 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8534 PC-relative access. */
8535 /* We use an UNSPEC rather than a LABEL_REF because this label
8536 never appears in the code stream. */
8537 labelno
= GEN_INT (pic_labelno
++);
8538 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8539 l1
= gen_rtx_CONST (VOIDmode
, l1
);
8541 /* On the ARM the PC register contains 'dot + 8' at the time of the
8542 addition, on the Thumb it is 'dot + 4'. */
8543 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
8544 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
8545 UNSPEC_SYMBOL_OFFSET
);
8546 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
8548 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
,
8555 /* Return nonzero if X is valid as an ARM state addressing register. */
8557 arm_address_register_rtx_p (rtx x
, int strict_p
)
8567 return ARM_REGNO_OK_FOR_BASE_P (regno
);
8569 return (regno
<= LAST_ARM_REGNUM
8570 || regno
>= FIRST_PSEUDO_REGISTER
8571 || regno
== FRAME_POINTER_REGNUM
8572 || regno
== ARG_POINTER_REGNUM
);
8575 /* Return TRUE if this rtx is the difference of a symbol and a label,
8576 and will reduce to a PC-relative relocation in the object file.
8577 Expressions like this can be left alone when generating PIC, rather
8578 than forced through the GOT. */
8580 pcrel_constant_p (rtx x
)
8582 if (GET_CODE (x
) == MINUS
)
8583 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
8588 /* Return true if X will surely end up in an index register after next
8591 will_be_in_index_register (const_rtx x
)
8593 /* arm.md: calculate_pic_address will split this into a register. */
8594 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
8597 /* Return nonzero if X is a valid ARM state address operand. */
8599 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
8603 enum rtx_code code
= GET_CODE (x
);
8605 if (arm_address_register_rtx_p (x
, strict_p
))
8608 use_ldrd
= (TARGET_LDRD
8609 && (mode
== DImode
|| mode
== DFmode
));
8611 if (code
== POST_INC
|| code
== PRE_DEC
8612 || ((code
== PRE_INC
|| code
== POST_DEC
)
8613 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8614 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8616 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8617 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8618 && GET_CODE (XEXP (x
, 1)) == PLUS
8619 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8621 rtx addend
= XEXP (XEXP (x
, 1), 1);
8623 /* Don't allow ldrd post increment by register because it's hard
8624 to fixup invalid register choices. */
8626 && GET_CODE (x
) == POST_MODIFY
8630 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
8631 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
8634 /* After reload constants split into minipools will have addresses
8635 from a LABEL_REF. */
8636 else if (reload_completed
8637 && (code
== LABEL_REF
8639 && GET_CODE (XEXP (x
, 0)) == PLUS
8640 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8641 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8644 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
8647 else if (code
== PLUS
)
8649 rtx xop0
= XEXP (x
, 0);
8650 rtx xop1
= XEXP (x
, 1);
8652 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8653 && ((CONST_INT_P (xop1
)
8654 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
8655 || (!strict_p
&& will_be_in_index_register (xop1
))))
8656 || (arm_address_register_rtx_p (xop1
, strict_p
)
8657 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
8661 /* Reload currently can't handle MINUS, so disable this for now */
8662 else if (GET_CODE (x
) == MINUS
)
8664 rtx xop0
= XEXP (x
, 0);
8665 rtx xop1
= XEXP (x
, 1);
8667 return (arm_address_register_rtx_p (xop0
, strict_p
)
8668 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
8672 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8673 && code
== SYMBOL_REF
8674 && CONSTANT_POOL_ADDRESS_P (x
)
8676 && symbol_mentioned_p (get_pool_constant (x
))
8677 && ! pcrel_constant_p (get_pool_constant (x
))))
8683 /* Return true if we can avoid creating a constant pool entry for x. */
8685 can_avoid_literal_pool_for_label_p (rtx x
)
8687 /* Normally we can assign constant values to target registers without
8688 the help of constant pool. But there are cases we have to use constant
8690 1) assign a label to register.
8691 2) sign-extend a 8bit value to 32bit and then assign to register.
8693 Constant pool access in format:
8694 (set (reg r0) (mem (symbol_ref (".LC0"))))
8695 will cause the use of literal pool (later in function arm_reorg).
8696 So here we mark such format as an invalid format, then the compiler
8697 will adjust it into:
8698 (set (reg r0) (symbol_ref (".LC0")))
8699 (set (reg r0) (mem (reg r0))).
8700 No extra register is required, and (mem (reg r0)) won't cause the use
8701 of literal pools. */
8702 if (arm_disable_literal_pool
&& SYMBOL_REF_P (x
)
8703 && CONSTANT_POOL_ADDRESS_P (x
))
8709 /* Return nonzero if X is a valid Thumb-2 address operand. */
8711 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8714 enum rtx_code code
= GET_CODE (x
);
8716 /* If we are dealing with a MVE predicate mode, then treat it as a HImode as
8717 can store and load it like any other 16-bit value. */
8718 if (TARGET_HAVE_MVE
&& VALID_MVE_PRED_MODE (mode
))
8721 if (TARGET_HAVE_MVE
&& VALID_MVE_MODE (mode
))
8722 return mve_vector_mem_operand (mode
, x
, strict_p
);
8724 if (arm_address_register_rtx_p (x
, strict_p
))
8727 use_ldrd
= (TARGET_LDRD
8728 && (mode
== DImode
|| mode
== DFmode
));
8730 if (code
== POST_INC
|| code
== PRE_DEC
8731 || ((code
== PRE_INC
|| code
== POST_DEC
)
8732 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8733 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8735 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8736 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8737 && GET_CODE (XEXP (x
, 1)) == PLUS
8738 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8740 /* Thumb-2 only has autoincrement by constant. */
8741 rtx addend
= XEXP (XEXP (x
, 1), 1);
8742 HOST_WIDE_INT offset
;
8744 if (!CONST_INT_P (addend
))
8747 offset
= INTVAL(addend
);
8748 if (GET_MODE_SIZE (mode
) <= 4)
8749 return (offset
> -256 && offset
< 256);
8751 return (use_ldrd
&& offset
> -1024 && offset
< 1024
8752 && (offset
& 3) == 0);
8755 /* After reload constants split into minipools will have addresses
8756 from a LABEL_REF. */
8757 else if (reload_completed
8758 && (code
== LABEL_REF
8760 && GET_CODE (XEXP (x
, 0)) == PLUS
8761 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8762 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8765 else if (mode
== TImode
8766 || (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
8767 || (TARGET_HAVE_MVE
&& VALID_MVE_STRUCT_MODE (mode
)))
8770 else if (code
== PLUS
)
8772 rtx xop0
= XEXP (x
, 0);
8773 rtx xop1
= XEXP (x
, 1);
8775 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8776 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
8777 || (!strict_p
&& will_be_in_index_register (xop1
))))
8778 || (arm_address_register_rtx_p (xop1
, strict_p
)
8779 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
8782 else if (can_avoid_literal_pool_for_label_p (x
))
8785 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8786 && code
== SYMBOL_REF
8787 && CONSTANT_POOL_ADDRESS_P (x
)
8789 && symbol_mentioned_p (get_pool_constant (x
))
8790 && ! pcrel_constant_p (get_pool_constant (x
))))
8796 /* Return nonzero if INDEX is valid for an address index operand in
8799 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
8802 HOST_WIDE_INT range
;
8803 enum rtx_code code
= GET_CODE (index
);
8805 /* Standard coprocessor addressing modes. */
8806 if (TARGET_HARD_FLOAT
8807 && (mode
== SFmode
|| mode
== DFmode
))
8808 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8809 && INTVAL (index
) > -1024
8810 && (INTVAL (index
) & 3) == 0);
8812 /* For quad modes, we restrict the constant offset to be slightly less
8813 than what the instruction format permits. We do this because for
8814 quad mode moves, we will actually decompose them into two separate
8815 double-mode reads or writes. INDEX must therefore be a valid
8816 (double-mode) offset and so should INDEX+8. */
8817 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8818 return (code
== CONST_INT
8819 && INTVAL (index
) < 1016
8820 && INTVAL (index
) > -1024
8821 && (INTVAL (index
) & 3) == 0);
8823 /* We have no such constraint on double mode offsets, so we permit the
8824 full range of the instruction format. */
8825 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8826 return (code
== CONST_INT
8827 && INTVAL (index
) < 1024
8828 && INTVAL (index
) > -1024
8829 && (INTVAL (index
) & 3) == 0);
8831 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8832 return (code
== CONST_INT
8833 && INTVAL (index
) < 1024
8834 && INTVAL (index
) > -1024
8835 && (INTVAL (index
) & 3) == 0);
8837 if (arm_address_register_rtx_p (index
, strict_p
)
8838 && (GET_MODE_SIZE (mode
) <= 4))
8841 if (mode
== DImode
|| mode
== DFmode
)
8843 if (code
== CONST_INT
)
8845 HOST_WIDE_INT val
= INTVAL (index
);
8847 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8848 If vldr is selected it uses arm_coproc_mem_operand. */
8850 return val
> -256 && val
< 256;
8852 return val
> -4096 && val
< 4092;
8855 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
8858 if (GET_MODE_SIZE (mode
) <= 4
8862 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
8866 rtx xiop0
= XEXP (index
, 0);
8867 rtx xiop1
= XEXP (index
, 1);
8869 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8870 && power_of_two_operand (xiop1
, SImode
))
8871 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8872 && power_of_two_operand (xiop0
, SImode
)));
8874 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
8875 || code
== ASHIFT
|| code
== ROTATERT
)
8877 rtx op
= XEXP (index
, 1);
8879 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8882 && INTVAL (op
) <= 31);
8886 /* For ARM v4 we may be doing a sign-extend operation during the
8892 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
8898 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
8900 return (code
== CONST_INT
8901 && INTVAL (index
) < range
8902 && INTVAL (index
) > -range
);
8905 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8906 index operand. i.e. 1, 2, 4 or 8. */
8908 thumb2_index_mul_operand (rtx op
)
8912 if (!CONST_INT_P (op
))
8916 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
8919 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8921 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
8923 enum rtx_code code
= GET_CODE (index
);
8925 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8926 /* Standard coprocessor addressing modes. */
8928 && (mode
== SFmode
|| mode
== DFmode
))
8929 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8930 /* Thumb-2 allows only > -256 index range for it's core register
8931 load/stores. Since we allow SF/DF in core registers, we have
8932 to use the intersection between -256~4096 (core) and -1024~1024
8934 && INTVAL (index
) > -256
8935 && (INTVAL (index
) & 3) == 0);
8937 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8939 /* For DImode assume values will usually live in core regs
8940 and only allow LDRD addressing modes. */
8941 if (!TARGET_LDRD
|| mode
!= DImode
)
8942 return (code
== CONST_INT
8943 && INTVAL (index
) < 1024
8944 && INTVAL (index
) > -1024
8945 && (INTVAL (index
) & 3) == 0);
8948 /* For quad modes, we restrict the constant offset to be slightly less
8949 than what the instruction format permits. We do this because for
8950 quad mode moves, we will actually decompose them into two separate
8951 double-mode reads or writes. INDEX must therefore be a valid
8952 (double-mode) offset and so should INDEX+8. */
8953 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8954 return (code
== CONST_INT
8955 && INTVAL (index
) < 1016
8956 && INTVAL (index
) > -1024
8957 && (INTVAL (index
) & 3) == 0);
8959 /* We have no such constraint on double mode offsets, so we permit the
8960 full range of the instruction format. */
8961 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8962 return (code
== CONST_INT
8963 && INTVAL (index
) < 1024
8964 && INTVAL (index
) > -1024
8965 && (INTVAL (index
) & 3) == 0);
8967 if (arm_address_register_rtx_p (index
, strict_p
)
8968 && (GET_MODE_SIZE (mode
) <= 4))
8971 if (mode
== DImode
|| mode
== DFmode
)
8973 if (code
== CONST_INT
)
8975 HOST_WIDE_INT val
= INTVAL (index
);
8976 /* Thumb-2 ldrd only has reg+const addressing modes.
8977 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8978 If vldr is selected it uses arm_coproc_mem_operand. */
8980 return IN_RANGE (val
, -1020, 1020) && (val
& 3) == 0;
8982 return IN_RANGE (val
, -255, 4095 - 4);
8990 rtx xiop0
= XEXP (index
, 0);
8991 rtx xiop1
= XEXP (index
, 1);
8993 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8994 && thumb2_index_mul_operand (xiop1
))
8995 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8996 && thumb2_index_mul_operand (xiop0
)));
8998 else if (code
== ASHIFT
)
9000 rtx op
= XEXP (index
, 1);
9002 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
9005 && INTVAL (op
) <= 3);
9008 return (code
== CONST_INT
9009 && INTVAL (index
) < 4096
9010 && INTVAL (index
) > -256);
9013 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
9015 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
9025 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
9027 return (regno
<= LAST_LO_REGNUM
9028 || regno
> LAST_VIRTUAL_REGISTER
9029 || regno
== FRAME_POINTER_REGNUM
9030 || (GET_MODE_SIZE (mode
) >= 4
9031 && (regno
== STACK_POINTER_REGNUM
9032 || regno
>= FIRST_PSEUDO_REGISTER
9033 || x
== hard_frame_pointer_rtx
9034 || x
== arg_pointer_rtx
)));
9037 /* Return nonzero if x is a legitimate index register. This is the case
9038 for any base register that can access a QImode object. */
9040 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
9042 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
9045 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
9047 The AP may be eliminated to either the SP or the FP, so we use the
9048 least common denominator, e.g. SImode, and offsets from 0 to 64.
9050 ??? Verify whether the above is the right approach.
9052 ??? Also, the FP may be eliminated to the SP, so perhaps that
9053 needs special handling also.
9055 ??? Look at how the mips16 port solves this problem. It probably uses
9056 better ways to solve some of these problems.
9058 Although it is not incorrect, we don't accept QImode and HImode
9059 addresses based on the frame pointer or arg pointer until the
9060 reload pass starts. This is so that eliminating such addresses
9061 into stack based ones won't produce impossible code. */
9063 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
9065 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
9068 /* ??? Not clear if this is right. Experiment. */
9069 if (GET_MODE_SIZE (mode
) < 4
9070 && !(reload_in_progress
|| reload_completed
)
9071 && (reg_mentioned_p (frame_pointer_rtx
, x
)
9072 || reg_mentioned_p (arg_pointer_rtx
, x
)
9073 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
9074 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
9075 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
9076 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
9079 /* Accept any base register. SP only in SImode or larger. */
9080 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
9083 /* This is PC relative data before arm_reorg runs. */
9084 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
9086 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
9087 && !arm_disable_literal_pool
)
9090 /* This is PC relative data after arm_reorg runs. */
9091 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
9094 || (GET_CODE (x
) == CONST
9095 && GET_CODE (XEXP (x
, 0)) == PLUS
9096 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
9097 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
9100 /* Post-inc indexing only supported for SImode and larger. */
9101 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
9102 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
9105 else if (GET_CODE (x
) == PLUS
)
9107 /* REG+REG address can be any two index registers. */
9108 /* We disallow FRAME+REG addressing since we know that FRAME
9109 will be replaced with STACK, and SP relative addressing only
9110 permits SP+OFFSET. */
9111 if (GET_MODE_SIZE (mode
) <= 4
9112 && XEXP (x
, 0) != frame_pointer_rtx
9113 && XEXP (x
, 1) != frame_pointer_rtx
9114 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
9115 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
9116 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
9119 /* REG+const has 5-7 bit offset for non-SP registers. */
9120 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
9121 || XEXP (x
, 0) == arg_pointer_rtx
)
9122 && CONST_INT_P (XEXP (x
, 1))
9123 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
9126 /* REG+const has 10-bit offset for SP, but only SImode and
9127 larger is supported. */
9128 /* ??? Should probably check for DI/DFmode overflow here
9129 just like GO_IF_LEGITIMATE_OFFSET does. */
9130 else if (REG_P (XEXP (x
, 0))
9131 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
9132 && GET_MODE_SIZE (mode
) >= 4
9133 && CONST_INT_P (XEXP (x
, 1))
9134 && INTVAL (XEXP (x
, 1)) >= 0
9135 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
9136 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
9139 else if (REG_P (XEXP (x
, 0))
9140 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
9141 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
9142 || VIRTUAL_REGISTER_P (XEXP (x
, 0)))
9143 && GET_MODE_SIZE (mode
) >= 4
9144 && CONST_INT_P (XEXP (x
, 1))
9145 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
9149 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
9150 && GET_MODE_SIZE (mode
) == 4
9152 && CONSTANT_POOL_ADDRESS_P (x
)
9153 && !arm_disable_literal_pool
9155 && symbol_mentioned_p (get_pool_constant (x
))
9156 && ! pcrel_constant_p (get_pool_constant (x
))))
9162 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9163 instruction of mode MODE. */
9165 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
9167 switch (GET_MODE_SIZE (mode
))
9170 return val
>= 0 && val
< 32;
9173 return val
>= 0 && val
< 64 && (val
& 1) == 0;
9177 && (val
+ GET_MODE_SIZE (mode
)) <= 128
9183 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
, code_helper
)
9186 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
9187 else if (TARGET_THUMB2
)
9188 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
9189 else /* if (TARGET_THUMB1) */
9190 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
9193 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9195 Given an rtx X being reloaded into a reg required to be
9196 in class CLASS, return the class of reg to actually use.
9197 In general this is just CLASS, but for the Thumb core registers and
9198 immediate constants we prefer a LO_REGS class or a subset. */
9201 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
9207 if (rclass
== GENERAL_REGS
)
9214 /* Build the SYMBOL_REF for __tls_get_addr. */
9216 static GTY(()) rtx tls_get_addr_libfunc
;
9219 get_tls_get_addr (void)
9221 if (!tls_get_addr_libfunc
)
9222 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
9223 return tls_get_addr_libfunc
;
9227 arm_load_tp (rtx target
)
9230 target
= gen_reg_rtx (SImode
);
9234 /* Can return in any reg. */
9235 emit_insn (gen_load_tp_hard (target
));
9239 /* Always returned in r0. Immediately copy the result into a pseudo,
9240 otherwise other uses of r0 (e.g. setting up function arguments) may
9241 clobber the value. */
9247 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
9248 rtx initial_fdpic_reg
= get_hard_reg_initial_val (Pmode
, FDPIC_REGNUM
);
9250 emit_insn (gen_load_tp_soft_fdpic ());
9253 emit_insn (gen_restore_pic_register_after_call(fdpic_reg
, initial_fdpic_reg
));
9256 emit_insn (gen_load_tp_soft ());
9258 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
9259 emit_move_insn (target
, tmp
);
9265 load_tls_operand (rtx x
, rtx reg
)
9269 if (reg
== NULL_RTX
)
9270 reg
= gen_reg_rtx (SImode
);
9272 tmp
= gen_rtx_CONST (SImode
, x
);
9274 emit_move_insn (reg
, tmp
);
9280 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
9282 rtx label
, labelno
= NULL_RTX
, sum
;
9284 gcc_assert (reloc
!= TLS_DESCSEQ
);
9289 sum
= gen_rtx_UNSPEC (Pmode
,
9290 gen_rtvec (2, x
, GEN_INT (reloc
)),
9295 labelno
= GEN_INT (pic_labelno
++);
9296 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9297 label
= gen_rtx_CONST (VOIDmode
, label
);
9299 sum
= gen_rtx_UNSPEC (Pmode
,
9300 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
9301 GEN_INT (TARGET_ARM
? 8 : 4)),
9304 reg
= load_tls_operand (sum
, reg
);
9307 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
9308 else if (TARGET_ARM
)
9309 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
9311 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
9313 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
9314 LCT_PURE
, /* LCT_CONST? */
9317 rtx_insn
*insns
= get_insns ();
9324 arm_tls_descseq_addr (rtx x
, rtx reg
)
9326 rtx labelno
= GEN_INT (pic_labelno
++);
9327 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9328 rtx sum
= gen_rtx_UNSPEC (Pmode
,
9329 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
9330 gen_rtx_CONST (VOIDmode
, label
),
9331 GEN_INT (!TARGET_ARM
)),
9333 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
9335 emit_insn (gen_tlscall (x
, labelno
));
9337 reg
= gen_reg_rtx (SImode
);
9339 gcc_assert (REGNO (reg
) != R0_REGNUM
);
9341 emit_move_insn (reg
, reg0
);
9348 legitimize_tls_address (rtx x
, rtx reg
)
9350 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
9352 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
9356 case TLS_MODEL_GLOBAL_DYNAMIC
:
9357 if (TARGET_GNU2_TLS
)
9359 gcc_assert (!TARGET_FDPIC
);
9361 reg
= arm_tls_descseq_addr (x
, reg
);
9363 tp
= arm_load_tp (NULL_RTX
);
9365 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
9369 /* Original scheme */
9371 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32_FDPIC
);
9373 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
9374 dest
= gen_reg_rtx (Pmode
);
9375 emit_libcall_block (insns
, dest
, ret
, x
);
9379 case TLS_MODEL_LOCAL_DYNAMIC
:
9380 if (TARGET_GNU2_TLS
)
9382 gcc_assert (!TARGET_FDPIC
);
9384 reg
= arm_tls_descseq_addr (x
, reg
);
9386 tp
= arm_load_tp (NULL_RTX
);
9388 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
9393 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32_FDPIC
);
9395 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
9397 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9398 share the LDM result with other LD model accesses. */
9399 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
9401 dest
= gen_reg_rtx (Pmode
);
9402 emit_libcall_block (insns
, dest
, ret
, eqv
);
9404 /* Load the addend. */
9405 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
9406 GEN_INT (TLS_LDO32
)),
9408 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
9409 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
9413 case TLS_MODEL_INITIAL_EXEC
:
9416 sum
= gen_rtx_UNSPEC (Pmode
,
9417 gen_rtvec (2, x
, GEN_INT (TLS_IE32_FDPIC
)),
9419 reg
= load_tls_operand (sum
, reg
);
9420 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
9421 emit_move_insn (reg
, gen_rtx_MEM (Pmode
, reg
));
9425 labelno
= GEN_INT (pic_labelno
++);
9426 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9427 label
= gen_rtx_CONST (VOIDmode
, label
);
9428 sum
= gen_rtx_UNSPEC (Pmode
,
9429 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
9430 GEN_INT (TARGET_ARM
? 8 : 4)),
9432 reg
= load_tls_operand (sum
, reg
);
9435 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
9436 else if (TARGET_THUMB2
)
9437 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
9440 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
9441 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
9445 tp
= arm_load_tp (NULL_RTX
);
9447 return gen_rtx_PLUS (Pmode
, tp
, reg
);
9449 case TLS_MODEL_LOCAL_EXEC
:
9450 tp
= arm_load_tp (NULL_RTX
);
9452 reg
= gen_rtx_UNSPEC (Pmode
,
9453 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
9455 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
9457 return gen_rtx_PLUS (Pmode
, tp
, reg
);
9464 /* Try machine-dependent ways of modifying an illegitimate address
9465 to be legitimate. If we find one, return the new, valid address. */
9467 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
9469 if (arm_tls_referenced_p (x
))
9473 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
9475 addend
= XEXP (XEXP (x
, 0), 1);
9476 x
= XEXP (XEXP (x
, 0), 0);
9479 if (!SYMBOL_REF_P (x
))
9482 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
9484 x
= legitimize_tls_address (x
, NULL_RTX
);
9488 x
= gen_rtx_PLUS (SImode
, x
, addend
);
9496 return thumb_legitimize_address (x
, orig_x
, mode
);
9498 if (GET_CODE (x
) == PLUS
)
9500 rtx xop0
= XEXP (x
, 0);
9501 rtx xop1
= XEXP (x
, 1);
9503 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
9504 xop0
= force_reg (SImode
, xop0
);
9506 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
9507 && !symbol_mentioned_p (xop1
))
9508 xop1
= force_reg (SImode
, xop1
);
9510 if (ARM_BASE_REGISTER_RTX_P (xop0
)
9511 && CONST_INT_P (xop1
))
9513 HOST_WIDE_INT n
, low_n
;
9517 /* VFP addressing modes actually allow greater offsets, but for
9518 now we just stick with the lowest common denominator. */
9519 if (mode
== DImode
|| mode
== DFmode
)
9531 low_n
= ((mode
) == TImode
? 0
9532 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
9536 base_reg
= gen_reg_rtx (SImode
);
9537 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
9538 emit_move_insn (base_reg
, val
);
9539 x
= plus_constant (Pmode
, base_reg
, low_n
);
9541 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
9542 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
9545 /* XXX We don't allow MINUS any more -- see comment in
9546 arm_legitimate_address_outer_p (). */
9547 else if (GET_CODE (x
) == MINUS
)
9549 rtx xop0
= XEXP (x
, 0);
9550 rtx xop1
= XEXP (x
, 1);
9552 if (CONSTANT_P (xop0
))
9553 xop0
= force_reg (SImode
, xop0
);
9555 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
9556 xop1
= force_reg (SImode
, xop1
);
9558 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
9559 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
9562 /* Make sure to take full advantage of the pre-indexed addressing mode
9563 with absolute addresses which often allows for the base register to
9564 be factorized for multiple adjacent memory references, and it might
9565 even allows for the mini pool to be avoided entirely. */
9566 else if (CONST_INT_P (x
) && optimize
> 0)
9569 HOST_WIDE_INT mask
, base
, index
;
9572 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9573 only use a 8-bit index. So let's use a 12-bit index for
9574 SImode only and hope that arm_gen_constant will enable LDRB
9575 to use more bits. */
9576 bits
= (mode
== SImode
) ? 12 : 8;
9577 mask
= (1 << bits
) - 1;
9578 base
= INTVAL (x
) & ~mask
;
9579 index
= INTVAL (x
) & mask
;
9580 if (TARGET_ARM
&& bit_count (base
& 0xffffffff) > (32 - bits
)/2)
9582 /* It'll most probably be more efficient to generate the
9583 base with more bits set and use a negative index instead.
9584 Don't do this for Thumb as negative offsets are much more
9589 base_reg
= force_reg (SImode
, GEN_INT (base
));
9590 x
= plus_constant (Pmode
, base_reg
, index
);
9595 /* We need to find and carefully transform any SYMBOL and LABEL
9596 references; so go back to the original address expression. */
9597 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9598 false /*compute_now*/);
9600 if (new_x
!= orig_x
)
9608 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9609 to be legitimate. If we find one, return the new, valid address. */
9611 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
9613 if (GET_CODE (x
) == PLUS
9614 && CONST_INT_P (XEXP (x
, 1))
9615 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
9616 || INTVAL (XEXP (x
, 1)) < 0))
9618 rtx xop0
= XEXP (x
, 0);
9619 rtx xop1
= XEXP (x
, 1);
9620 HOST_WIDE_INT offset
= INTVAL (xop1
);
9622 /* Try and fold the offset into a biasing of the base register and
9623 then offsetting that. Don't do this when optimizing for space
9624 since it can cause too many CSEs. */
9625 if (optimize_size
&& offset
>= 0
9626 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
9628 HOST_WIDE_INT delta
;
9631 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
9632 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
9633 delta
= 31 * GET_MODE_SIZE (mode
);
9635 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
9637 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
9639 x
= plus_constant (Pmode
, xop0
, delta
);
9641 else if (offset
< 0 && offset
> -256)
9642 /* Small negative offsets are best done with a subtract before the
9643 dereference, forcing these into a register normally takes two
9645 x
= force_operand (x
, NULL_RTX
);
9648 /* For the remaining cases, force the constant into a register. */
9649 xop1
= force_reg (SImode
, xop1
);
9650 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
9653 else if (GET_CODE (x
) == PLUS
9654 && s_register_operand (XEXP (x
, 1), SImode
)
9655 && !s_register_operand (XEXP (x
, 0), SImode
))
9657 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
9659 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
9664 /* We need to find and carefully transform any SYMBOL and LABEL
9665 references; so go back to the original address expression. */
9666 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9667 false /*compute_now*/);
9669 if (new_x
!= orig_x
)
9676 /* Return TRUE if X contains any TLS symbol references. */
9679 arm_tls_referenced_p (rtx x
)
9681 if (! TARGET_HAVE_TLS
)
9684 subrtx_iterator::array_type array
;
9685 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
9687 const_rtx x
= *iter
;
9688 if (SYMBOL_REF_P (x
) && SYMBOL_REF_TLS_MODEL (x
) != 0)
9690 /* ARM currently does not provide relocations to encode TLS variables
9691 into AArch32 instructions, only data, so there is no way to
9692 currently implement these if a literal pool is disabled. */
9693 if (arm_disable_literal_pool
)
9694 sorry ("accessing thread-local storage is not currently supported "
9695 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9700 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9701 TLS offsets, not real symbol references. */
9702 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9703 iter
.skip_subrtxes ();
9708 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9710 On the ARM, allow any integer (invalid ones are removed later by insn
9711 patterns), nice doubles and symbol_refs which refer to the function's
9714 When generating pic allow anything. */
9717 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
9719 if (GET_CODE (x
) == CONST_VECTOR
&& !neon_make_constant (x
, false))
9722 return flag_pic
|| !label_mentioned_p (x
);
9726 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9728 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9729 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9730 for ARMv8-M Baseline or later the result is valid. */
9731 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
9734 return (CONST_INT_P (x
)
9735 || CONST_DOUBLE_P (x
)
9736 || CONSTANT_ADDRESS_P (x
)
9737 || (TARGET_HAVE_MOVT
&& SYMBOL_REF_P (x
))
9738 /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9739 we build the symbol address with upper/lower
9742 && !label_mentioned_p (x
)
9743 && arm_valid_symbolic_address_p (x
)
9744 && arm_disable_literal_pool
)
9749 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
9751 return (!arm_cannot_force_const_mem (mode
, x
)
9753 ? arm_legitimate_constant_p_1 (mode
, x
)
9754 : thumb_legitimate_constant_p (mode
, x
)));
9757 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9760 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9763 split_const (x
, &base
, &offset
);
9765 if (SYMBOL_REF_P (base
))
9767 /* Function symbols cannot have an offset due to the Thumb bit. */
9768 if ((SYMBOL_REF_FLAGS (base
) & SYMBOL_FLAG_FUNCTION
)
9769 && INTVAL (offset
) != 0)
9772 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9773 && !offset_within_block_p (base
, INTVAL (offset
)))
9776 return arm_tls_referenced_p (x
);
9779 #define REG_OR_SUBREG_REG(X) \
9781 || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9783 #define REG_OR_SUBREG_RTX(X) \
9784 (REG_P (X) ? (X) : SUBREG_REG (X))
9787 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9789 machine_mode mode
= GET_MODE (x
);
9798 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9805 return COSTS_N_INSNS (1);
9808 if (arm_arch6m
&& arm_m_profile_small_mul
)
9809 return COSTS_N_INSNS (32);
9811 if (CONST_INT_P (XEXP (x
, 1)))
9814 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
9821 return COSTS_N_INSNS (2) + cycles
;
9823 return COSTS_N_INSNS (1) + 16;
9826 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9828 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9829 return (COSTS_N_INSNS (words
)
9830 + 4 * ((MEM_P (SET_SRC (x
)))
9831 + MEM_P (SET_DEST (x
))));
9836 if (UINTVAL (x
) < 256
9837 /* 16-bit constant. */
9838 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
9840 if (thumb_shiftable_const (INTVAL (x
)))
9841 return COSTS_N_INSNS (2);
9842 return arm_disable_literal_pool
9844 : COSTS_N_INSNS (3);
9846 else if ((outer
== PLUS
|| outer
== COMPARE
)
9847 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9849 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9850 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9851 return COSTS_N_INSNS (1);
9852 else if (outer
== AND
)
9855 /* This duplicates the tests in the andsi3 expander. */
9856 for (i
= 9; i
<= 31; i
++)
9857 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9858 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9859 return COSTS_N_INSNS (2);
9861 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9862 || outer
== LSHIFTRT
)
9864 return COSTS_N_INSNS (2);
9870 return COSTS_N_INSNS (3);
9888 /* XXX another guess. */
9889 /* Memory costs quite a lot for the first word, but subsequent words
9890 load at the equivalent of a single insn each. */
9891 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9892 + ((SYMBOL_REF_P (x
) && CONSTANT_POOL_ADDRESS_P (x
))
9897 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9903 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
9904 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
9910 return total
+ COSTS_N_INSNS (1);
9912 /* Assume a two-shift sequence. Increase the cost slightly so
9913 we prefer actual shifts over an extend operation. */
9914 return total
+ 1 + COSTS_N_INSNS (2);
9921 /* Estimates the size cost of thumb1 instructions.
9922 For now most of the code is copied from thumb1_rtx_costs. We need more
9923 fine grain tuning when we have more related test cases. */
9925 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9927 machine_mode mode
= GET_MODE (x
);
9936 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9940 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9941 defined by RTL expansion, especially for the expansion of
9943 if ((GET_CODE (XEXP (x
, 0)) == MULT
9944 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
9945 || (GET_CODE (XEXP (x
, 1)) == MULT
9946 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
9947 return COSTS_N_INSNS (2);
9952 return COSTS_N_INSNS (1);
9955 if (CONST_INT_P (XEXP (x
, 1)))
9957 /* Thumb1 mul instruction can't operate on const. We must Load it
9958 into a register first. */
9959 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
9960 /* For the targets which have a very small and high-latency multiply
9961 unit, we prefer to synthesize the mult with up to 5 instructions,
9962 giving a good balance between size and performance. */
9963 if (arm_arch6m
&& arm_m_profile_small_mul
)
9964 return COSTS_N_INSNS (5);
9966 return COSTS_N_INSNS (1) + const_size
;
9968 return COSTS_N_INSNS (1);
9971 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9973 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9974 cost
= COSTS_N_INSNS (words
);
9975 if (satisfies_constraint_J (SET_SRC (x
))
9976 || satisfies_constraint_K (SET_SRC (x
))
9977 /* Too big an immediate for a 2-byte mov, using MOVT. */
9978 || (CONST_INT_P (SET_SRC (x
))
9979 && UINTVAL (SET_SRC (x
)) >= 256
9981 && satisfies_constraint_j (SET_SRC (x
)))
9982 /* thumb1_movdi_insn. */
9983 || ((words
> 1) && MEM_P (SET_SRC (x
))))
9984 cost
+= COSTS_N_INSNS (1);
9990 if (UINTVAL (x
) < 256)
9991 return COSTS_N_INSNS (1);
9992 /* movw is 4byte long. */
9993 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
9994 return COSTS_N_INSNS (2);
9995 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9996 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9997 return COSTS_N_INSNS (2);
9998 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9999 if (thumb_shiftable_const (INTVAL (x
)))
10000 return COSTS_N_INSNS (2);
10001 return arm_disable_literal_pool
10002 ? COSTS_N_INSNS (8)
10003 : COSTS_N_INSNS (3);
10005 else if ((outer
== PLUS
|| outer
== COMPARE
)
10006 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
10008 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
10009 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
10010 return COSTS_N_INSNS (1);
10011 else if (outer
== AND
)
10014 /* This duplicates the tests in the andsi3 expander. */
10015 for (i
= 9; i
<= 31; i
++)
10016 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
10017 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
10018 return COSTS_N_INSNS (2);
10020 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
10021 || outer
== LSHIFTRT
)
10023 return COSTS_N_INSNS (2);
10029 return COSTS_N_INSNS (3);
10043 return COSTS_N_INSNS (1);
10046 return (COSTS_N_INSNS (1)
10047 + COSTS_N_INSNS (1)
10048 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
10049 + ((SYMBOL_REF_P (x
) && CONSTANT_POOL_ADDRESS_P (x
))
10050 ? COSTS_N_INSNS (1) : 0));
10054 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10059 /* XXX still guessing. */
10060 switch (GET_MODE (XEXP (x
, 0)))
10063 return (1 + (mode
== DImode
? 4 : 0)
10064 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
10067 return (4 + (mode
== DImode
? 4 : 0)
10068 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
10071 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
10082 /* Helper function for arm_rtx_costs. If one operand of the OP, a
10083 PLUS, adds the carry flag, then return the other operand. If
10084 neither is a carry, return OP unchanged. */
10086 strip_carry_operation (rtx op
)
10088 gcc_assert (GET_CODE (op
) == PLUS
);
10089 if (arm_carry_operation (XEXP (op
, 0), GET_MODE (op
)))
10090 return XEXP (op
, 1);
10091 else if (arm_carry_operation (XEXP (op
, 1), GET_MODE (op
)))
10092 return XEXP (op
, 0);
10096 /* Helper function for arm_rtx_costs. If the operand is a valid shift
10097 operand, then return the operand that is being shifted. If the shift
10098 is not by a constant, then set SHIFT_REG to point to the operand.
10099 Return NULL if OP is not a shifter operand. */
10101 shifter_op_p (rtx op
, rtx
*shift_reg
)
10103 enum rtx_code code
= GET_CODE (op
);
10105 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
10106 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
10107 return XEXP (op
, 0);
10108 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
10109 return XEXP (op
, 0);
10110 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
10111 || code
== ASHIFTRT
)
10113 if (!CONST_INT_P (XEXP (op
, 1)))
10114 *shift_reg
= XEXP (op
, 1);
10115 return XEXP (op
, 0);
10122 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
10124 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
10125 rtx_code code
= GET_CODE (x
);
10126 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
10128 switch (XINT (x
, 1))
10130 case UNSPEC_UNALIGNED_LOAD
:
10131 /* We can only do unaligned loads into the integer unit, and we can't
10132 use LDM or LDRD. */
10133 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
10135 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
10136 + extra_cost
->ldst
.load_unaligned
);
10139 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
10140 ADDR_SPACE_GENERIC
, speed_p
);
10144 case UNSPEC_UNALIGNED_STORE
:
10145 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
10147 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
10148 + extra_cost
->ldst
.store_unaligned
);
10150 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
10152 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
10153 ADDR_SPACE_GENERIC
, speed_p
);
10157 case UNSPEC_VRINTZ
:
10158 case UNSPEC_VRINTP
:
10159 case UNSPEC_VRINTM
:
10160 case UNSPEC_VRINTR
:
10161 case UNSPEC_VRINTX
:
10162 case UNSPEC_VRINTA
:
10164 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
10168 *cost
= COSTS_N_INSNS (2);
10174 /* Cost of a libcall. We assume one insn per argument, an amount for the
10175 call (one insn for -Os) and then one for processing the result. */
10176 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10178 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
10181 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
10182 if (shift_op != NULL \
10183 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
10188 *cost += extra_cost->alu.arith_shift_reg; \
10189 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10190 ASHIFT, 1, speed_p); \
10192 else if (speed_p) \
10193 *cost += extra_cost->alu.arith_shift; \
10195 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
10196 ASHIFT, 0, speed_p) \
10197 + rtx_cost (XEXP (x, 1 - IDX), \
10198 GET_MODE (shift_op), \
10199 OP, 1, speed_p)); \
10205 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
10206 considering the costs of the addressing mode and memory access
10209 arm_mem_costs (rtx x
, const struct cpu_cost_table
*extra_cost
,
10210 int *cost
, bool speed_p
)
10212 machine_mode mode
= GET_MODE (x
);
10214 *cost
= COSTS_N_INSNS (1);
10217 && GET_CODE (XEXP (x
, 0)) == PLUS
10218 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
10219 /* This will be split into two instructions. Add the cost of the
10220 additional instruction here. The cost of the memory access is computed
10221 below. See arm.md:calculate_pic_address. */
10222 *cost
+= COSTS_N_INSNS (1);
10224 /* Calculate cost of the addressing mode. */
10227 arm_addr_mode_op op_type
;
10228 switch (GET_CODE (XEXP (x
, 0)))
10232 op_type
= AMO_DEFAULT
;
10235 /* MINUS does not appear in RTL, but the architecture supports it,
10236 so handle this case defensively. */
10239 op_type
= AMO_NO_WB
;
10251 if (VECTOR_MODE_P (mode
))
10252 *cost
+= current_tune
->addr_mode_costs
->vector
[op_type
];
10253 else if (FLOAT_MODE_P (mode
))
10254 *cost
+= current_tune
->addr_mode_costs
->fp
[op_type
];
10256 *cost
+= current_tune
->addr_mode_costs
->integer
[op_type
];
10259 /* Calculate cost of memory access. */
10262 if (FLOAT_MODE_P (mode
))
10264 if (GET_MODE_SIZE (mode
) == 8)
10265 *cost
+= extra_cost
->ldst
.loadd
;
10267 *cost
+= extra_cost
->ldst
.loadf
;
10269 else if (VECTOR_MODE_P (mode
))
10270 *cost
+= extra_cost
->ldst
.loadv
;
10273 /* Integer modes */
10274 if (GET_MODE_SIZE (mode
) == 8)
10275 *cost
+= extra_cost
->ldst
.ldrd
;
10277 *cost
+= extra_cost
->ldst
.load
;
10284 /* Helper for arm_bfi_p. */
10286 arm_bfi_1_p (rtx op0
, rtx op1
, rtx
*sub0
, rtx
*sub1
)
10288 unsigned HOST_WIDE_INT const1
;
10289 unsigned HOST_WIDE_INT const2
= 0;
10291 if (!CONST_INT_P (XEXP (op0
, 1)))
10294 const1
= UINTVAL (XEXP (op0
, 1));
10295 if (!CONST_INT_P (XEXP (op1
, 1))
10296 || ~UINTVAL (XEXP (op1
, 1)) != const1
)
10299 if (GET_CODE (XEXP (op0
, 0)) == ASHIFT
10300 && CONST_INT_P (XEXP (XEXP (op0
, 0), 1)))
10302 const2
= UINTVAL (XEXP (XEXP (op0
, 0), 1));
10303 *sub0
= XEXP (XEXP (op0
, 0), 0);
10306 *sub0
= XEXP (op0
, 0);
10308 if (const2
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
10311 *sub1
= XEXP (op1
, 0);
10312 return exact_log2 (const1
+ (HOST_WIDE_INT_1U
<< const2
)) >= 0;
10315 /* Recognize a BFI idiom. Helper for arm_rtx_costs_internal. The
10316 format looks something like:
10318 (IOR (AND (reg1) (~const1))
10319 (AND (ASHIFT (reg2) (const2))
10322 where const1 is a consecutive sequence of 1-bits with the
10323 least-significant non-zero bit starting at bit position const2. If
10324 const2 is zero, then the shift will not appear at all, due to
10325 canonicalization. The two arms of the IOR expression may be
10328 arm_bfi_p (rtx x
, rtx
*sub0
, rtx
*sub1
)
10330 if (GET_CODE (x
) != IOR
)
10332 if (GET_CODE (XEXP (x
, 0)) != AND
10333 || GET_CODE (XEXP (x
, 1)) != AND
)
10335 return (arm_bfi_1_p (XEXP (x
, 0), XEXP (x
, 1), sub0
, sub1
)
10336 || arm_bfi_1_p (XEXP (x
, 1), XEXP (x
, 0), sub1
, sub0
));
10339 /* RTX costs. Make an estimate of the cost of executing the operation
10340 X, which is contained within an operation with code OUTER_CODE.
10341 SPEED_P indicates whether the cost desired is the performance cost,
10342 or the size cost. The estimate is stored in COST and the return
10343 value is TRUE if the cost calculation is final, or FALSE if the
10344 caller should recurse through the operands of X to add additional
10347 We currently make no attempt to model the size savings of Thumb-2
10348 16-bit instructions. At the normal points in compilation where
10349 this code is called we have no measure of whether the condition
10350 flags are live or not, and thus no realistic way to determine what
10351 the size will eventually be. */
10353 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10354 const struct cpu_cost_table
*extra_cost
,
10355 int *cost
, bool speed_p
)
10357 machine_mode mode
= GET_MODE (x
);
10359 *cost
= COSTS_N_INSNS (1);
10364 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
10366 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
10374 /* SET RTXs don't have a mode so we get it from the destination. */
10375 mode
= GET_MODE (SET_DEST (x
));
10377 if (REG_P (SET_SRC (x
))
10378 && REG_P (SET_DEST (x
)))
10380 /* Assume that most copies can be done with a single insn,
10381 unless we don't have HW FP, in which case everything
10382 larger than word mode will require two insns. */
10383 *cost
= COSTS_N_INSNS (((!TARGET_VFP_BASE
10384 && GET_MODE_SIZE (mode
) > 4)
10387 /* Conditional register moves can be encoded
10388 in 16 bits in Thumb mode. */
10389 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
10395 if (CONST_INT_P (SET_SRC (x
)))
10397 /* Handle CONST_INT here, since the value doesn't have a mode
10398 and we would otherwise be unable to work out the true cost. */
10399 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
10402 /* Slightly lower the cost of setting a core reg to a constant.
10403 This helps break up chains and allows for better scheduling. */
10404 if (REG_P (SET_DEST (x
))
10405 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
10408 /* Immediate moves with an immediate in the range [0, 255] can be
10409 encoded in 16 bits in Thumb mode. */
10410 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
10411 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
10413 goto const_int_cost
;
10419 return arm_mem_costs (x
, extra_cost
, cost
, speed_p
);
10423 /* Calculations of LDM costs are complex. We assume an initial cost
10424 (ldm_1st) which will load the number of registers mentioned in
10425 ldm_regs_per_insn_1st registers; then each additional
10426 ldm_regs_per_insn_subsequent registers cost one more insn. The
10427 formula for N regs is thus:
10429 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10430 + ldm_regs_per_insn_subsequent - 1)
10431 / ldm_regs_per_insn_subsequent).
10433 Additional costs may also be added for addressing. A similar
10434 formula is used for STM. */
10436 bool is_ldm
= load_multiple_operation (x
, SImode
);
10437 bool is_stm
= store_multiple_operation (x
, SImode
);
10439 if (is_ldm
|| is_stm
)
10443 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
10444 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
10445 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
10446 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
10447 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
10448 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
10449 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
10451 *cost
+= regs_per_insn_1st
10452 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
10453 + regs_per_insn_sub
- 1)
10454 / regs_per_insn_sub
);
10463 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10464 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10465 *cost
+= COSTS_N_INSNS (speed_p
10466 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
10467 else if (mode
== SImode
&& TARGET_IDIV
)
10468 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
10470 *cost
= LIBCALL_COST (2);
10472 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10473 possible udiv is prefered. */
10474 *cost
+= (code
== DIV
? COSTS_N_INSNS (1) : 0);
10475 return false; /* All arguments must be in registers. */
10478 /* MOD by a power of 2 can be expanded as:
10480 and r0, r0, #(n - 1)
10481 and r1, r1, #(n - 1)
10482 rsbpl r0, r1, #0. */
10483 if (CONST_INT_P (XEXP (x
, 1))
10484 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
10487 *cost
+= COSTS_N_INSNS (3);
10490 *cost
+= 2 * extra_cost
->alu
.logical
10491 + extra_cost
->alu
.arith
;
10495 /* Fall-through. */
10497 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10498 possible udiv is prefered. */
10499 *cost
= LIBCALL_COST (2) + (code
== MOD
? COSTS_N_INSNS (1) : 0);
10500 return false; /* All arguments must be in registers. */
10503 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
10505 *cost
+= (COSTS_N_INSNS (1)
10506 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
10508 *cost
+= extra_cost
->alu
.shift_reg
;
10516 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
10518 *cost
+= (COSTS_N_INSNS (2)
10519 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
10521 *cost
+= 2 * extra_cost
->alu
.shift
;
10522 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10523 if (code
== ASHIFT
&& XEXP (x
, 1) == CONST1_RTX (SImode
))
10527 else if (mode
== SImode
)
10529 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10530 /* Slightly disparage register shifts at -Os, but not by much. */
10531 if (!CONST_INT_P (XEXP (x
, 1)))
10532 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
10533 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10536 else if (GET_MODE_CLASS (mode
) == MODE_INT
10537 && GET_MODE_SIZE (mode
) < 4)
10539 if (code
== ASHIFT
)
10541 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10542 /* Slightly disparage register shifts at -Os, but not by
10544 if (!CONST_INT_P (XEXP (x
, 1)))
10545 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
10546 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10548 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
10550 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
10552 /* Can use SBFX/UBFX. */
10554 *cost
+= extra_cost
->alu
.bfx
;
10555 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10559 *cost
+= COSTS_N_INSNS (1);
10560 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10563 if (CONST_INT_P (XEXP (x
, 1)))
10564 *cost
+= 2 * extra_cost
->alu
.shift
;
10566 *cost
+= (extra_cost
->alu
.shift
10567 + extra_cost
->alu
.shift_reg
);
10570 /* Slightly disparage register shifts. */
10571 *cost
+= !CONST_INT_P (XEXP (x
, 1));
10574 else /* Rotates. */
10576 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
10577 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10580 if (CONST_INT_P (XEXP (x
, 1)))
10581 *cost
+= (2 * extra_cost
->alu
.shift
10582 + extra_cost
->alu
.log_shift
);
10584 *cost
+= (extra_cost
->alu
.shift
10585 + extra_cost
->alu
.shift_reg
10586 + extra_cost
->alu
.log_shift_reg
);
10592 *cost
= LIBCALL_COST (2);
10598 if (mode
== SImode
)
10601 *cost
+= extra_cost
->alu
.rev
;
10608 /* No rev instruction available. Look at arm_legacy_rev
10609 and thumb_legacy_rev for the form of RTL used then. */
10612 *cost
+= COSTS_N_INSNS (9);
10616 *cost
+= 6 * extra_cost
->alu
.shift
;
10617 *cost
+= 3 * extra_cost
->alu
.logical
;
10622 *cost
+= COSTS_N_INSNS (4);
10626 *cost
+= 2 * extra_cost
->alu
.shift
;
10627 *cost
+= extra_cost
->alu
.arith_shift
;
10628 *cost
+= 2 * extra_cost
->alu
.logical
;
10636 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10637 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10639 if (GET_CODE (XEXP (x
, 0)) == MULT
10640 || GET_CODE (XEXP (x
, 1)) == MULT
)
10642 rtx mul_op0
, mul_op1
, sub_op
;
10645 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10647 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10649 mul_op0
= XEXP (XEXP (x
, 0), 0);
10650 mul_op1
= XEXP (XEXP (x
, 0), 1);
10651 sub_op
= XEXP (x
, 1);
10655 mul_op0
= XEXP (XEXP (x
, 1), 0);
10656 mul_op1
= XEXP (XEXP (x
, 1), 1);
10657 sub_op
= XEXP (x
, 0);
10660 /* The first operand of the multiply may be optionally
10662 if (GET_CODE (mul_op0
) == NEG
)
10663 mul_op0
= XEXP (mul_op0
, 0);
10665 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10666 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10667 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
10673 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10677 if (mode
== SImode
)
10679 rtx shift_by_reg
= NULL
;
10682 rtx op0
= XEXP (x
, 0);
10683 rtx op1
= XEXP (x
, 1);
10685 /* Factor out any borrow operation. There's more than one way
10686 of expressing this; try to recognize them all. */
10687 if (GET_CODE (op0
) == MINUS
)
10689 if (arm_borrow_operation (op1
, SImode
))
10691 op1
= XEXP (op0
, 1);
10692 op0
= XEXP (op0
, 0);
10694 else if (arm_borrow_operation (XEXP (op0
, 1), SImode
))
10695 op0
= XEXP (op0
, 0);
10697 else if (GET_CODE (op1
) == PLUS
10698 && arm_borrow_operation (XEXP (op1
, 0), SImode
))
10699 op1
= XEXP (op1
, 0);
10700 else if (GET_CODE (op0
) == NEG
10701 && arm_borrow_operation (op1
, SImode
))
10703 /* Negate with carry-in. For Thumb2 this is done with
10704 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10705 RSC instruction that exists in Arm mode. */
10707 *cost
+= (TARGET_THUMB2
10708 ? extra_cost
->alu
.arith_shift
10709 : extra_cost
->alu
.arith
);
10710 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, MINUS
, 0, speed_p
);
10713 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10714 Note we do mean ~borrow here. */
10715 else if (TARGET_ARM
&& arm_carry_operation (op0
, SImode
))
10717 *cost
+= rtx_cost (op1
, mode
, code
, 1, speed_p
);
10721 shift_op
= shifter_op_p (op0
, &shift_by_reg
);
10722 if (shift_op
== NULL
)
10724 shift_op
= shifter_op_p (op1
, &shift_by_reg
);
10725 non_shift_op
= op0
;
10728 non_shift_op
= op1
;
10730 if (shift_op
!= NULL
)
10732 if (shift_by_reg
!= NULL
)
10735 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10736 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
10739 *cost
+= extra_cost
->alu
.arith_shift
;
10741 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
10742 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
10746 if (arm_arch_thumb2
10747 && GET_CODE (XEXP (x
, 1)) == MULT
)
10751 *cost
+= extra_cost
->mult
[0].add
;
10752 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
10753 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
10754 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
10758 if (CONST_INT_P (op0
))
10760 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
10761 INTVAL (op0
), NULL_RTX
,
10763 *cost
= COSTS_N_INSNS (insns
);
10765 *cost
+= insns
* extra_cost
->alu
.arith
;
10766 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10770 *cost
+= extra_cost
->alu
.arith
;
10772 /* Don't recurse as we don't want to cost any borrow that
10774 *cost
+= rtx_cost (op0
, mode
, MINUS
, 0, speed_p
);
10775 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10779 if (GET_MODE_CLASS (mode
) == MODE_INT
10780 && GET_MODE_SIZE (mode
) < 4)
10782 rtx shift_op
, shift_reg
;
10785 /* We check both sides of the MINUS for shifter operands since,
10786 unlike PLUS, it's not commutative. */
10788 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0);
10789 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1);
10791 /* Slightly disparage, as we might need to widen the result. */
10794 *cost
+= extra_cost
->alu
.arith
;
10796 if (CONST_INT_P (XEXP (x
, 0)))
10798 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10805 if (mode
== DImode
)
10807 *cost
+= COSTS_N_INSNS (1);
10809 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
10811 rtx op1
= XEXP (x
, 1);
10814 *cost
+= 2 * extra_cost
->alu
.arith
;
10816 if (GET_CODE (op1
) == ZERO_EXTEND
)
10817 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
10820 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10821 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10825 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10828 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
10829 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
10831 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
10834 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10835 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
10838 *cost
+= (extra_cost
->alu
.arith
10839 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10840 ? extra_cost
->alu
.arith
10841 : extra_cost
->alu
.arith_shift
));
10842 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
10843 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10844 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
10849 *cost
+= 2 * extra_cost
->alu
.arith
;
10855 *cost
= LIBCALL_COST (2);
10859 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10860 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10862 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10864 rtx mul_op0
, mul_op1
, add_op
;
10867 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10869 mul_op0
= XEXP (XEXP (x
, 0), 0);
10870 mul_op1
= XEXP (XEXP (x
, 0), 1);
10871 add_op
= XEXP (x
, 1);
10873 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10874 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10875 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
10881 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10884 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10886 *cost
= LIBCALL_COST (2);
10890 /* Narrow modes can be synthesized in SImode, but the range
10891 of useful sub-operations is limited. Check for shift operations
10892 on one of the operands. Only left shifts can be used in the
10894 if (GET_MODE_CLASS (mode
) == MODE_INT
10895 && GET_MODE_SIZE (mode
) < 4)
10897 rtx shift_op
, shift_reg
;
10900 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0);
10902 if (CONST_INT_P (XEXP (x
, 1)))
10904 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10905 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10907 *cost
= COSTS_N_INSNS (insns
);
10909 *cost
+= insns
* extra_cost
->alu
.arith
;
10910 /* Slightly penalize a narrow operation as the result may
10912 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10916 /* Slightly penalize a narrow operation as the result may
10920 *cost
+= extra_cost
->alu
.arith
;
10925 if (mode
== SImode
)
10927 rtx shift_op
, shift_reg
;
10929 if (TARGET_INT_SIMD
10930 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10931 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10933 /* UXTA[BH] or SXTA[BH]. */
10935 *cost
+= extra_cost
->alu
.extend_arith
;
10936 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10938 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
10942 rtx op0
= XEXP (x
, 0);
10943 rtx op1
= XEXP (x
, 1);
10945 /* Handle a side effect of adding in the carry to an addition. */
10946 if (GET_CODE (op0
) == PLUS
10947 && arm_carry_operation (op1
, mode
))
10949 op1
= XEXP (op0
, 1);
10950 op0
= XEXP (op0
, 0);
10952 else if (GET_CODE (op1
) == PLUS
10953 && arm_carry_operation (op0
, mode
))
10955 op0
= XEXP (op1
, 0);
10956 op1
= XEXP (op1
, 1);
10958 else if (GET_CODE (op0
) == PLUS
)
10960 op0
= strip_carry_operation (op0
);
10961 if (swap_commutative_operands_p (op0
, op1
))
10962 std::swap (op0
, op1
);
10965 if (arm_carry_operation (op0
, mode
))
10967 /* Adding the carry to a register is a canonicalization of
10968 adding 0 to the register plus the carry. */
10970 *cost
+= extra_cost
->alu
.arith
;
10971 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
10976 shift_op
= shifter_op_p (op0
, &shift_reg
);
10977 if (shift_op
!= NULL
)
10982 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10983 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10986 *cost
+= extra_cost
->alu
.arith_shift
;
10988 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10989 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10993 if (GET_CODE (op0
) == MULT
)
10997 if (TARGET_DSP_MULTIPLY
10998 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
10999 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
11000 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
11001 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
11002 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
11003 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
11004 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
11005 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
11006 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
11007 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
11008 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
11009 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
11012 /* SMLA[BT][BT]. */
11014 *cost
+= extra_cost
->mult
[0].extend_add
;
11015 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
11016 SIGN_EXTEND
, 0, speed_p
)
11017 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
11018 SIGN_EXTEND
, 0, speed_p
)
11019 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
11024 *cost
+= extra_cost
->mult
[0].add
;
11025 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
11026 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
11027 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
11031 if (CONST_INT_P (op1
))
11033 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
11034 INTVAL (op1
), NULL_RTX
,
11036 *cost
= COSTS_N_INSNS (insns
);
11038 *cost
+= insns
* extra_cost
->alu
.arith
;
11039 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
11044 *cost
+= extra_cost
->alu
.arith
;
11046 /* Don't recurse here because we want to test the operands
11047 without any carry operation. */
11048 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
11049 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
11053 if (mode
== DImode
)
11055 if (GET_CODE (XEXP (x
, 0)) == MULT
11056 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
11057 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
11058 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
11059 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
11062 *cost
+= extra_cost
->mult
[1].extend_add
;
11063 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
11064 ZERO_EXTEND
, 0, speed_p
)
11065 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
11066 ZERO_EXTEND
, 0, speed_p
)
11067 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
11071 *cost
+= COSTS_N_INSNS (1);
11073 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11074 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
11077 *cost
+= (extra_cost
->alu
.arith
11078 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11079 ? extra_cost
->alu
.arith
11080 : extra_cost
->alu
.arith_shift
));
11082 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
11084 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
11089 *cost
+= 2 * extra_cost
->alu
.arith
;
11094 *cost
= LIBCALL_COST (2);
11099 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
11102 *cost
+= extra_cost
->alu
.rev
;
11106 else if (mode
== SImode
&& arm_arch_thumb2
11107 && arm_bfi_p (x
, &sub0
, &sub1
))
11109 *cost
+= rtx_cost (sub0
, mode
, ZERO_EXTRACT
, 1, speed_p
);
11110 *cost
+= rtx_cost (sub1
, mode
, ZERO_EXTRACT
, 0, speed_p
);
11112 *cost
+= extra_cost
->alu
.bfi
;
11118 /* Fall through. */
11119 case AND
: case XOR
:
11120 if (mode
== SImode
)
11122 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
11123 rtx op0
= XEXP (x
, 0);
11124 rtx shift_op
, shift_reg
;
11128 || (code
== IOR
&& TARGET_THUMB2
)))
11129 op0
= XEXP (op0
, 0);
11132 shift_op
= shifter_op_p (op0
, &shift_reg
);
11133 if (shift_op
!= NULL
)
11138 *cost
+= extra_cost
->alu
.log_shift_reg
;
11139 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
11142 *cost
+= extra_cost
->alu
.log_shift
;
11144 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
11145 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
11149 if (CONST_INT_P (XEXP (x
, 1)))
11151 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
11152 INTVAL (XEXP (x
, 1)), NULL_RTX
,
11155 *cost
= COSTS_N_INSNS (insns
);
11157 *cost
+= insns
* extra_cost
->alu
.logical
;
11158 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
11163 *cost
+= extra_cost
->alu
.logical
;
11164 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
11165 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
11169 if (mode
== DImode
)
11171 rtx op0
= XEXP (x
, 0);
11172 enum rtx_code subcode
= GET_CODE (op0
);
11174 *cost
+= COSTS_N_INSNS (1);
11178 || (code
== IOR
&& TARGET_THUMB2
)))
11179 op0
= XEXP (op0
, 0);
11181 if (GET_CODE (op0
) == ZERO_EXTEND
)
11184 *cost
+= 2 * extra_cost
->alu
.logical
;
11186 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
11188 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
11191 else if (GET_CODE (op0
) == SIGN_EXTEND
)
11194 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
11196 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
11198 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
11203 *cost
+= 2 * extra_cost
->alu
.logical
;
11209 *cost
= LIBCALL_COST (2);
11213 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11214 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11216 rtx op0
= XEXP (x
, 0);
11218 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
11219 op0
= XEXP (op0
, 0);
11222 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
11224 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
11225 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
11228 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11230 *cost
= LIBCALL_COST (2);
11234 if (mode
== SImode
)
11236 if (TARGET_DSP_MULTIPLY
11237 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
11238 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
11239 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
11240 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
11241 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
11242 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
11243 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11244 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
11245 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
11246 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
11247 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
11248 && (INTVAL (XEXP (XEXP (x
, 1), 1))
11251 /* SMUL[TB][TB]. */
11253 *cost
+= extra_cost
->mult
[0].extend
;
11254 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
11255 SIGN_EXTEND
, 0, speed_p
);
11256 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
11257 SIGN_EXTEND
, 1, speed_p
);
11261 *cost
+= extra_cost
->mult
[0].simple
;
11265 if (mode
== DImode
)
11267 if ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11268 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
11269 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
11270 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
))
11273 *cost
+= extra_cost
->mult
[1].extend
;
11274 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
11275 ZERO_EXTEND
, 0, speed_p
)
11276 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
11277 ZERO_EXTEND
, 0, speed_p
));
11281 *cost
= LIBCALL_COST (2);
11286 *cost
= LIBCALL_COST (2);
11290 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11291 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11293 if (GET_CODE (XEXP (x
, 0)) == MULT
)
11296 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
11301 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
11305 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11307 *cost
= LIBCALL_COST (1);
11311 if (mode
== SImode
)
11313 if (GET_CODE (XEXP (x
, 0)) == ABS
)
11315 *cost
+= COSTS_N_INSNS (1);
11316 /* Assume the non-flag-changing variant. */
11318 *cost
+= (extra_cost
->alu
.log_shift
11319 + extra_cost
->alu
.arith_shift
);
11320 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
11324 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
11325 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
11327 *cost
+= COSTS_N_INSNS (1);
11328 /* No extra cost for MOV imm and MVN imm. */
11329 /* If the comparison op is using the flags, there's no further
11330 cost, otherwise we need to add the cost of the comparison. */
11331 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
11332 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
11333 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
11335 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
11336 *cost
+= (COSTS_N_INSNS (1)
11337 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
11339 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
11342 *cost
+= extra_cost
->alu
.arith
;
11348 *cost
+= extra_cost
->alu
.arith
;
11352 if (GET_MODE_CLASS (mode
) == MODE_INT
11353 && GET_MODE_SIZE (mode
) < 4)
11355 /* Slightly disparage, as we might need an extend operation. */
11358 *cost
+= extra_cost
->alu
.arith
;
11362 if (mode
== DImode
)
11364 *cost
+= COSTS_N_INSNS (1);
11366 *cost
+= 2 * extra_cost
->alu
.arith
;
11371 *cost
= LIBCALL_COST (1);
11375 if (mode
== SImode
)
11378 rtx shift_reg
= NULL
;
11380 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
11384 if (shift_reg
!= NULL
)
11387 *cost
+= extra_cost
->alu
.log_shift_reg
;
11388 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
11391 *cost
+= extra_cost
->alu
.log_shift
;
11392 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
11397 *cost
+= extra_cost
->alu
.logical
;
11400 if (mode
== DImode
)
11402 *cost
+= COSTS_N_INSNS (1);
11408 *cost
+= LIBCALL_COST (1);
11413 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
11415 *cost
+= COSTS_N_INSNS (3);
11418 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
11419 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
11421 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
11422 /* Assume that if one arm of the if_then_else is a register,
11423 that it will be tied with the result and eliminate the
11424 conditional insn. */
11425 if (REG_P (XEXP (x
, 1)))
11427 else if (REG_P (XEXP (x
, 2)))
11433 if (extra_cost
->alu
.non_exec_costs_exec
)
11434 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
11436 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
11439 *cost
+= op1cost
+ op2cost
;
11445 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
11449 machine_mode op0mode
;
11450 /* We'll mostly assume that the cost of a compare is the cost of the
11451 LHS. However, there are some notable exceptions. */
11453 /* Floating point compares are never done as side-effects. */
11454 op0mode
= GET_MODE (XEXP (x
, 0));
11455 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
11456 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11459 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
11461 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
11463 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
11469 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
11471 *cost
= LIBCALL_COST (2);
11475 /* DImode compares normally take two insns. */
11476 if (op0mode
== DImode
)
11478 *cost
+= COSTS_N_INSNS (1);
11480 *cost
+= 2 * extra_cost
->alu
.arith
;
11484 if (op0mode
== SImode
)
11489 if (XEXP (x
, 1) == const0_rtx
11490 && !(REG_P (XEXP (x
, 0))
11491 || (GET_CODE (XEXP (x
, 0)) == SUBREG
11492 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
11494 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
11496 /* Multiply operations that set the flags are often
11497 significantly more expensive. */
11499 && GET_CODE (XEXP (x
, 0)) == MULT
11500 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
11501 *cost
+= extra_cost
->mult
[0].flag_setting
;
11504 && GET_CODE (XEXP (x
, 0)) == PLUS
11505 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11506 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
11508 *cost
+= extra_cost
->mult
[0].flag_setting
;
11513 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
11514 if (shift_op
!= NULL
)
11516 if (shift_reg
!= NULL
)
11518 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
11521 *cost
+= extra_cost
->alu
.arith_shift_reg
;
11524 *cost
+= extra_cost
->alu
.arith_shift
;
11525 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
11526 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
11531 *cost
+= extra_cost
->alu
.arith
;
11532 if (CONST_INT_P (XEXP (x
, 1))
11533 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
11535 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
11543 *cost
= LIBCALL_COST (2);
11553 /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11556 && TARGET_HARD_FLOAT
11557 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
11558 && (XEXP (x
, 1) == CONST0_RTX (mode
)))
11564 /* Fall through. */
11578 if (outer_code
== SET
)
11580 /* Is it a store-flag operation? */
11581 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
11582 && XEXP (x
, 1) == const0_rtx
)
11584 /* Thumb also needs an IT insn. */
11585 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
11588 if (XEXP (x
, 1) == const0_rtx
)
11593 /* LSR Rd, Rn, #31. */
11595 *cost
+= extra_cost
->alu
.shift
;
11605 *cost
+= COSTS_N_INSNS (1);
11609 /* RSBS T1, Rn, Rn, LSR #31
11611 *cost
+= COSTS_N_INSNS (1);
11613 *cost
+= extra_cost
->alu
.arith_shift
;
11617 /* RSB Rd, Rn, Rn, ASR #1
11618 LSR Rd, Rd, #31. */
11619 *cost
+= COSTS_N_INSNS (1);
11621 *cost
+= (extra_cost
->alu
.arith_shift
11622 + extra_cost
->alu
.shift
);
11628 *cost
+= COSTS_N_INSNS (1);
11630 *cost
+= extra_cost
->alu
.shift
;
11634 /* Remaining cases are either meaningless or would take
11635 three insns anyway. */
11636 *cost
= COSTS_N_INSNS (3);
11639 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11644 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
11645 if (CONST_INT_P (XEXP (x
, 1))
11646 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
11648 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11655 /* Not directly inside a set. If it involves the condition code
11656 register it must be the condition for a branch, cond_exec or
11657 I_T_E operation. Since the comparison is performed elsewhere
11658 this is just the control part which has no additional
11660 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
11661 && XEXP (x
, 1) == const0_rtx
)
11669 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11670 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11673 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
11677 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11679 *cost
= LIBCALL_COST (1);
11683 if (mode
== SImode
)
11686 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
11690 *cost
= LIBCALL_COST (1);
11694 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
11695 && MEM_P (XEXP (x
, 0)))
11697 if (mode
== DImode
)
11698 *cost
+= COSTS_N_INSNS (1);
11703 if (GET_MODE (XEXP (x
, 0)) == SImode
)
11704 *cost
+= extra_cost
->ldst
.load
;
11706 *cost
+= extra_cost
->ldst
.load_sign_extend
;
11708 if (mode
== DImode
)
11709 *cost
+= extra_cost
->alu
.shift
;
11714 /* Widening from less than 32-bits requires an extend operation. */
11715 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11717 /* We have SXTB/SXTH. */
11718 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11720 *cost
+= extra_cost
->alu
.extend
;
11722 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11724 /* Needs two shifts. */
11725 *cost
+= COSTS_N_INSNS (1);
11726 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11728 *cost
+= 2 * extra_cost
->alu
.shift
;
11731 /* Widening beyond 32-bits requires one more insn. */
11732 if (mode
== DImode
)
11734 *cost
+= COSTS_N_INSNS (1);
11736 *cost
+= extra_cost
->alu
.shift
;
11743 || GET_MODE (XEXP (x
, 0)) == SImode
11744 || GET_MODE (XEXP (x
, 0)) == QImode
)
11745 && MEM_P (XEXP (x
, 0)))
11747 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11749 if (mode
== DImode
)
11750 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11755 /* Widening from less than 32-bits requires an extend operation. */
11756 if (GET_MODE (XEXP (x
, 0)) == QImode
)
11758 /* UXTB can be a shorter instruction in Thumb2, but it might
11759 be slower than the AND Rd, Rn, #255 alternative. When
11760 optimizing for speed it should never be slower to use
11761 AND, and we don't really model 16-bit vs 32-bit insns
11764 *cost
+= extra_cost
->alu
.logical
;
11766 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11768 /* We have UXTB/UXTH. */
11769 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11771 *cost
+= extra_cost
->alu
.extend
;
11773 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11775 /* Needs two shifts. It's marginally preferable to use
11776 shifts rather than two BIC instructions as the second
11777 shift may merge with a subsequent insn as a shifter
11779 *cost
= COSTS_N_INSNS (2);
11780 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11782 *cost
+= 2 * extra_cost
->alu
.shift
;
11785 /* Widening beyond 32-bits requires one more insn. */
11786 if (mode
== DImode
)
11788 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11795 /* CONST_INT has no mode, so we cannot tell for sure how many
11796 insns are really going to be needed. The best we can do is
11797 look at the value passed. If it fits in SImode, then assume
11798 that's the mode it will be used for. Otherwise assume it
11799 will be used in DImode. */
11800 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
11805 /* Avoid blowing up in arm_gen_constant (). */
11806 if (!(outer_code
== PLUS
11807 || outer_code
== AND
11808 || outer_code
== IOR
11809 || outer_code
== XOR
11810 || outer_code
== MINUS
))
11814 if (mode
== SImode
)
11816 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
11817 INTVAL (x
), NULL
, NULL
,
11823 *cost
+= COSTS_N_INSNS (arm_gen_constant
11824 (outer_code
, SImode
, NULL
,
11825 trunc_int_for_mode (INTVAL (x
), SImode
),
11827 + arm_gen_constant (outer_code
, SImode
, NULL
,
11828 INTVAL (x
) >> 32, NULL
,
11840 if (arm_arch_thumb2
&& !flag_pic
)
11841 *cost
+= COSTS_N_INSNS (1);
11843 *cost
+= extra_cost
->ldst
.load
;
11846 *cost
+= COSTS_N_INSNS (1);
11850 *cost
+= COSTS_N_INSNS (1);
11852 *cost
+= extra_cost
->alu
.arith
;
11858 *cost
= COSTS_N_INSNS (4);
11863 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11864 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11866 if (vfp3_const_double_rtx (x
))
11869 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
11875 if (mode
== DFmode
)
11876 *cost
+= extra_cost
->ldst
.loadd
;
11878 *cost
+= extra_cost
->ldst
.loadf
;
11881 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
11885 *cost
= COSTS_N_INSNS (4);
11890 if (((TARGET_NEON
&& TARGET_HARD_FLOAT
11891 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
)))
11892 || TARGET_HAVE_MVE
)
11893 && simd_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
11894 *cost
= COSTS_N_INSNS (1);
11896 *cost
= COSTS_N_INSNS (4);
11901 /* When optimizing for size, we prefer constant pool entries to
11902 MOVW/MOVT pairs, so bump the cost of these slightly. */
11909 *cost
+= extra_cost
->alu
.clz
;
11913 if (XEXP (x
, 1) == const0_rtx
)
11916 *cost
+= extra_cost
->alu
.log_shift
;
11917 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11920 /* Fall through. */
11924 *cost
+= COSTS_N_INSNS (1);
11928 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
11929 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11930 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
11931 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11932 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
11933 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
11934 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
11935 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11939 *cost
+= extra_cost
->mult
[1].extend
;
11940 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
11941 ZERO_EXTEND
, 0, speed_p
)
11942 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
11943 ZERO_EXTEND
, 0, speed_p
));
11946 *cost
= LIBCALL_COST (1);
11949 case UNSPEC_VOLATILE
:
11951 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
11954 /* Reading the PC is like reading any other register. Writing it
11955 is more expensive, but we take that into account elsewhere. */
11960 /* TODO: Simple zero_extract of bottom bits using AND. */
11961 /* Fall through. */
11965 && CONST_INT_P (XEXP (x
, 1))
11966 && CONST_INT_P (XEXP (x
, 2)))
11969 *cost
+= extra_cost
->alu
.bfx
;
11970 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11973 /* Without UBFX/SBFX, need to resort to shift operations. */
11974 *cost
+= COSTS_N_INSNS (1);
11976 *cost
+= 2 * extra_cost
->alu
.shift
;
11977 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
11981 if (TARGET_HARD_FLOAT
)
11984 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
11986 && GET_MODE (XEXP (x
, 0)) == HFmode
)
11988 /* Pre v8, widening HF->DF is a two-step process, first
11989 widening to SFmode. */
11990 *cost
+= COSTS_N_INSNS (1);
11992 *cost
+= extra_cost
->fp
[0].widen
;
11994 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11998 *cost
= LIBCALL_COST (1);
12001 case FLOAT_TRUNCATE
:
12002 if (TARGET_HARD_FLOAT
)
12005 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
12006 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
12008 /* Vector modes? */
12010 *cost
= LIBCALL_COST (1);
12014 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
12016 rtx op0
= XEXP (x
, 0);
12017 rtx op1
= XEXP (x
, 1);
12018 rtx op2
= XEXP (x
, 2);
12021 /* vfms or vfnma. */
12022 if (GET_CODE (op0
) == NEG
)
12023 op0
= XEXP (op0
, 0);
12025 /* vfnms or vfnma. */
12026 if (GET_CODE (op2
) == NEG
)
12027 op2
= XEXP (op2
, 0);
12029 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
12030 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
12031 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
12034 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
12039 *cost
= LIBCALL_COST (3);
12044 if (TARGET_HARD_FLOAT
)
12046 /* The *combine_vcvtf2i reduces a vmul+vcvt into
12047 a vcvt fixed-point conversion. */
12048 if (code
== FIX
&& mode
== SImode
12049 && GET_CODE (XEXP (x
, 0)) == FIX
12050 && GET_MODE (XEXP (x
, 0)) == SFmode
12051 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
12052 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
12056 *cost
+= extra_cost
->fp
[0].toint
;
12058 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
12063 if (GET_MODE_CLASS (mode
) == MODE_INT
)
12065 mode
= GET_MODE (XEXP (x
, 0));
12067 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
12068 /* Strip of the 'cost' of rounding towards zero. */
12069 if (GET_CODE (XEXP (x
, 0)) == FIX
)
12070 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
12073 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
12074 /* ??? Increase the cost to deal with transferring from
12075 FP -> CORE registers? */
12078 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
12082 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
12085 /* Vector costs? */
12087 *cost
= LIBCALL_COST (1);
12091 case UNSIGNED_FLOAT
:
12092 if (TARGET_HARD_FLOAT
)
12094 /* ??? Increase the cost to deal with transferring from CORE
12095 -> FP registers? */
12097 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
12100 *cost
= LIBCALL_COST (1);
12108 /* Just a guess. Guess number of instructions in the asm
12109 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
12110 though (see PR60663). */
12111 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
12112 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
12114 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
12118 if (mode
!= VOIDmode
)
12119 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
12121 *cost
= COSTS_N_INSNS (4); /* Who knows? */
12126 #undef HANDLE_NARROW_SHIFT_ARITH
12128 /* RTX costs entry point. */
12131 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
12132 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
12135 int code
= GET_CODE (x
);
12136 gcc_assert (current_tune
->insn_extra_cost
);
12138 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
12139 (enum rtx_code
) outer_code
,
12140 current_tune
->insn_extra_cost
,
12143 if (dump_file
&& arm_verbose_cost
)
12145 print_rtl_single (dump_file
, x
);
12146 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
12147 *total
, result
? "final" : "partial");
12153 arm_insn_cost (rtx_insn
*insn
, bool speed
)
12157 /* Don't cost a simple reg-reg move at a full insn cost: such moves
12158 will likely disappear during register allocation. */
12159 if (!reload_completed
12160 && GET_CODE (PATTERN (insn
)) == SET
12161 && REG_P (SET_DEST (PATTERN (insn
)))
12162 && REG_P (SET_SRC (PATTERN (insn
))))
12164 cost
= pattern_cost (PATTERN (insn
), speed
);
12165 /* If the cost is zero, then it's likely a complex insn. We don't want the
12166 cost of these to be less than something we know about. */
12167 return cost
? cost
: COSTS_N_INSNS (2);
12170 /* All address computations that can be done are free, but rtx cost returns
12171 the same for practically all of them. So we weight the different types
12172 of address here in the order (most pref first):
12173 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
12175 arm_arm_address_cost (rtx x
)
12177 enum rtx_code c
= GET_CODE (x
);
12179 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
12181 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
12186 if (CONST_INT_P (XEXP (x
, 1)))
12189 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
12199 arm_thumb_address_cost (rtx x
)
12201 enum rtx_code c
= GET_CODE (x
);
12206 && REG_P (XEXP (x
, 0))
12207 && CONST_INT_P (XEXP (x
, 1)))
12214 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
12215 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
12217 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
12220 /* Adjust cost hook for XScale. */
12222 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12225 /* Some true dependencies can have a higher cost depending
12226 on precisely how certain input operands are used. */
12228 && recog_memoized (insn
) >= 0
12229 && recog_memoized (dep
) >= 0)
12231 int shift_opnum
= get_attr_shift (insn
);
12232 enum attr_type attr_type
= get_attr_type (dep
);
12234 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12235 operand for INSN. If we have a shifted input operand and the
12236 instruction we depend on is another ALU instruction, then we may
12237 have to account for an additional stall. */
12238 if (shift_opnum
!= 0
12239 && (attr_type
== TYPE_ALU_SHIFT_IMM_LSL_1TO4
12240 || attr_type
== TYPE_ALU_SHIFT_IMM_OTHER
12241 || attr_type
== TYPE_ALUS_SHIFT_IMM
12242 || attr_type
== TYPE_LOGIC_SHIFT_IMM
12243 || attr_type
== TYPE_LOGICS_SHIFT_IMM
12244 || attr_type
== TYPE_ALU_SHIFT_REG
12245 || attr_type
== TYPE_ALUS_SHIFT_REG
12246 || attr_type
== TYPE_LOGIC_SHIFT_REG
12247 || attr_type
== TYPE_LOGICS_SHIFT_REG
12248 || attr_type
== TYPE_MOV_SHIFT
12249 || attr_type
== TYPE_MVN_SHIFT
12250 || attr_type
== TYPE_MOV_SHIFT_REG
12251 || attr_type
== TYPE_MVN_SHIFT_REG
))
12253 rtx shifted_operand
;
12256 /* Get the shifted operand. */
12257 extract_insn (insn
);
12258 shifted_operand
= recog_data
.operand
[shift_opnum
];
12260 /* Iterate over all the operands in DEP. If we write an operand
12261 that overlaps with SHIFTED_OPERAND, then we have increase the
12262 cost of this dependency. */
12263 extract_insn (dep
);
12264 preprocess_constraints (dep
);
12265 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
12267 /* We can ignore strict inputs. */
12268 if (recog_data
.operand_type
[opno
] == OP_IN
)
12271 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
12283 /* Adjust cost hook for Cortex A9. */
12285 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12295 case REG_DEP_OUTPUT
:
12296 if (recog_memoized (insn
) >= 0
12297 && recog_memoized (dep
) >= 0)
12299 if (GET_CODE (PATTERN (insn
)) == SET
)
12302 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
12304 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
12306 enum attr_type attr_type_insn
= get_attr_type (insn
);
12307 enum attr_type attr_type_dep
= get_attr_type (dep
);
12309 /* By default all dependencies of the form
12312 have an extra latency of 1 cycle because
12313 of the input and output dependency in this
12314 case. However this gets modeled as an true
12315 dependency and hence all these checks. */
12316 if (REG_P (SET_DEST (PATTERN (insn
)))
12317 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
12319 /* FMACS is a special case where the dependent
12320 instruction can be issued 3 cycles before
12321 the normal latency in case of an output
12323 if ((attr_type_insn
== TYPE_FMACS
12324 || attr_type_insn
== TYPE_FMACD
)
12325 && (attr_type_dep
== TYPE_FMACS
12326 || attr_type_dep
== TYPE_FMACD
))
12328 if (dep_type
== REG_DEP_OUTPUT
)
12329 *cost
= insn_default_latency (dep
) - 3;
12331 *cost
= insn_default_latency (dep
);
12336 if (dep_type
== REG_DEP_OUTPUT
)
12337 *cost
= insn_default_latency (dep
) + 1;
12339 *cost
= insn_default_latency (dep
);
12349 gcc_unreachable ();
12355 /* Adjust cost hook for FA726TE. */
12357 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12360 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12361 have penalty of 3. */
12362 if (dep_type
== REG_DEP_TRUE
12363 && recog_memoized (insn
) >= 0
12364 && recog_memoized (dep
) >= 0
12365 && get_attr_conds (dep
) == CONDS_SET
)
12367 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12368 if (get_attr_conds (insn
) == CONDS_USE
12369 && get_attr_type (insn
) != TYPE_BRANCH
)
12375 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
12376 || get_attr_conds (insn
) == CONDS_USE
)
12386 /* Implement TARGET_REGISTER_MOVE_COST.
12388 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12389 it is typically more expensive than a single memory access. We set
12390 the cost to less than two memory accesses so that floating
12391 point to integer conversion does not go through memory. */
12394 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
12395 reg_class_t from
, reg_class_t to
)
12399 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
12400 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
12402 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
12403 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
12405 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
12412 if (from
== HI_REGS
|| to
== HI_REGS
)
12419 /* Implement TARGET_MEMORY_MOVE_COST. */
12422 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
12423 bool in ATTRIBUTE_UNUSED
)
12429 if (GET_MODE_SIZE (mode
) < 4)
12432 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
12436 /* Vectorizer cost model implementation. */
12438 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12440 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
12442 int misalign ATTRIBUTE_UNUSED
)
12446 switch (type_of_cost
)
12449 return current_tune
->vec_costs
->scalar_stmt_cost
;
12452 return current_tune
->vec_costs
->scalar_load_cost
;
12455 return current_tune
->vec_costs
->scalar_store_cost
;
12458 return current_tune
->vec_costs
->vec_stmt_cost
;
12461 return current_tune
->vec_costs
->vec_align_load_cost
;
12464 return current_tune
->vec_costs
->vec_store_cost
;
12466 case vec_to_scalar
:
12467 return current_tune
->vec_costs
->vec_to_scalar_cost
;
12469 case scalar_to_vec
:
12470 return current_tune
->vec_costs
->scalar_to_vec_cost
;
12472 case unaligned_load
:
12473 case vector_gather_load
:
12474 return current_tune
->vec_costs
->vec_unalign_load_cost
;
12476 case unaligned_store
:
12477 case vector_scatter_store
:
12478 return current_tune
->vec_costs
->vec_unalign_store_cost
;
12480 case cond_branch_taken
:
12481 return current_tune
->vec_costs
->cond_taken_branch_cost
;
12483 case cond_branch_not_taken
:
12484 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
12487 case vec_promote_demote
:
12488 return current_tune
->vec_costs
->vec_stmt_cost
;
12490 case vec_construct
:
12491 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
12492 return elements
/ 2 + 1;
12495 gcc_unreachable ();
12499 /* Return true if and only if this insn can dual-issue only as older. */
12501 cortexa7_older_only (rtx_insn
*insn
)
12503 if (recog_memoized (insn
) < 0)
12506 switch (get_attr_type (insn
))
12508 case TYPE_ALU_DSP_REG
:
12509 case TYPE_ALU_SREG
:
12510 case TYPE_ALUS_SREG
:
12511 case TYPE_LOGIC_REG
:
12512 case TYPE_LOGICS_REG
:
12514 case TYPE_ADCS_REG
:
12519 case TYPE_SHIFT_IMM
:
12520 case TYPE_SHIFT_REG
:
12521 case TYPE_LOAD_BYTE
:
12524 case TYPE_FFARITHS
:
12526 case TYPE_FFARITHD
:
12544 case TYPE_F_STORES
:
12551 /* Return true if and only if this insn can dual-issue as younger. */
12553 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
12555 if (recog_memoized (insn
) < 0)
12558 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
12562 switch (get_attr_type (insn
))
12565 case TYPE_ALUS_IMM
:
12566 case TYPE_LOGIC_IMM
:
12567 case TYPE_LOGICS_IMM
:
12572 case TYPE_MOV_SHIFT
:
12573 case TYPE_MOV_SHIFT_REG
:
12583 /* Look for an instruction that can dual issue only as an older
12584 instruction, and move it in front of any instructions that can
12585 dual-issue as younger, while preserving the relative order of all
12586 other instructions in the ready list. This is a hueuristic to help
12587 dual-issue in later cycles, by postponing issue of more flexible
12588 instructions. This heuristic may affect dual issue opportunities
12589 in the current cycle. */
12591 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
12592 int *n_readyp
, int clock
)
12595 int first_older_only
= -1, first_younger
= -1;
12599 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12603 /* Traverse the ready list from the head (the instruction to issue
12604 first), and looking for the first instruction that can issue as
12605 younger and the first instruction that can dual-issue only as
12607 for (i
= *n_readyp
- 1; i
>= 0; i
--)
12609 rtx_insn
*insn
= ready
[i
];
12610 if (cortexa7_older_only (insn
))
12612 first_older_only
= i
;
12614 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
12617 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
12621 /* Nothing to reorder because either no younger insn found or insn
12622 that can dual-issue only as older appears before any insn that
12623 can dual-issue as younger. */
12624 if (first_younger
== -1)
12627 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
12631 /* Nothing to reorder because no older-only insn in the ready list. */
12632 if (first_older_only
== -1)
12635 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
12639 /* Move first_older_only insn before first_younger. */
12641 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
12642 INSN_UID(ready
[first_older_only
]),
12643 INSN_UID(ready
[first_younger
]));
12644 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
12645 for (i
= first_older_only
; i
< first_younger
; i
++)
12647 ready
[i
] = ready
[i
+1];
12650 ready
[i
] = first_older_only_insn
;
12654 /* Implement TARGET_SCHED_REORDER. */
12656 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
12661 case TARGET_CPU_cortexa7
:
12662 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
12665 /* Do nothing for other cores. */
12669 return arm_issue_rate ();
12672 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12673 It corrects the value of COST based on the relationship between
12674 INSN and DEP through the dependence LINK. It returns the new
12675 value. There is a per-core adjust_cost hook to adjust scheduler costs
12676 and the per-core hook can choose to completely override the generic
12677 adjust_cost function. Only put bits of code into arm_adjust_cost that
12678 are common across all cores. */
12680 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
12685 /* When generating Thumb-1 code, we want to place flag-setting operations
12686 close to a conditional branch which depends on them, so that we can
12687 omit the comparison. */
12690 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
12691 && recog_memoized (dep
) >= 0
12692 && get_attr_conds (dep
) == CONDS_SET
)
12695 if (current_tune
->sched_adjust_cost
!= NULL
)
12697 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
12701 /* XXX Is this strictly true? */
12702 if (dep_type
== REG_DEP_ANTI
12703 || dep_type
== REG_DEP_OUTPUT
)
12706 /* Call insns don't incur a stall, even if they follow a load. */
12711 if ((i_pat
= single_set (insn
)) != NULL
12712 && MEM_P (SET_SRC (i_pat
))
12713 && (d_pat
= single_set (dep
)) != NULL
12714 && MEM_P (SET_DEST (d_pat
)))
12716 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
12717 /* This is a load after a store, there is no conflict if the load reads
12718 from a cached area. Assume that loads from the stack, and from the
12719 constant pool are cached, and that others will miss. This is a
12722 if ((SYMBOL_REF_P (src_mem
)
12723 && CONSTANT_POOL_ADDRESS_P (src_mem
))
12724 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
12725 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
12726 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
12734 arm_max_conditional_execute (void)
12736 return max_insns_skipped
;
12740 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
12743 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12745 return (optimize
> 0) ? 2 : 0;
12749 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12751 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12754 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12755 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12756 sequences of non-executed instructions in IT blocks probably take the same
12757 amount of time as executed instructions (and the IT instruction itself takes
12758 space in icache). This function was experimentally determined to give good
12759 results on a popular embedded benchmark. */
12762 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12764 return (TARGET_32BIT
&& speed_p
) ? 1
12765 : arm_default_branch_cost (speed_p
, predictable_p
);
12769 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
12771 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12774 static bool fp_consts_inited
= false;
12776 static REAL_VALUE_TYPE value_fp0
;
12779 init_fp_table (void)
12783 r
= REAL_VALUE_ATOF ("0", DFmode
);
12785 fp_consts_inited
= true;
12788 /* Return TRUE if rtx X is a valid immediate FP constant. */
12790 arm_const_double_rtx (rtx x
)
12792 const REAL_VALUE_TYPE
*r
;
12794 if (!fp_consts_inited
)
12797 r
= CONST_DOUBLE_REAL_VALUE (x
);
12798 if (REAL_VALUE_MINUS_ZERO (*r
))
12801 if (real_equal (r
, &value_fp0
))
12807 /* VFPv3 has a fairly wide range of representable immediates, formed from
12808 "quarter-precision" floating-point values. These can be evaluated using this
12809 formula (with ^ for exponentiation):
12813 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12814 16 <= n <= 31 and 0 <= r <= 7.
12816 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12818 - A (most-significant) is the sign bit.
12819 - BCD are the exponent (encoded as r XOR 3).
12820 - EFGH are the mantissa (encoded as n - 16).
12823 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12824 fconst[sd] instruction, or -1 if X isn't suitable. */
12826 vfp3_const_double_index (rtx x
)
12828 REAL_VALUE_TYPE r
, m
;
12829 int sign
, exponent
;
12830 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12831 unsigned HOST_WIDE_INT mask
;
12832 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12835 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12838 r
= *CONST_DOUBLE_REAL_VALUE (x
);
12840 /* We can't represent these things, so detect them first. */
12841 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12844 /* Extract sign, exponent and mantissa. */
12845 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12846 r
= real_value_abs (&r
);
12847 exponent
= REAL_EXP (&r
);
12848 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12849 highest (sign) bit, with a fixed binary point at bit point_pos.
12850 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12851 bits for the mantissa, this may fail (low bits would be lost). */
12852 real_ldexp (&m
, &r
, point_pos
- exponent
);
12853 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12854 mantissa
= w
.elt (0);
12855 mant_hi
= w
.elt (1);
12857 /* If there are bits set in the low part of the mantissa, we can't
12858 represent this value. */
12862 /* Now make it so that mantissa contains the most-significant bits, and move
12863 the point_pos to indicate that the least-significant bits have been
12865 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12866 mantissa
= mant_hi
;
12868 /* We can permit four significant bits of mantissa only, plus a high bit
12869 which is always 1. */
12870 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
12871 if ((mantissa
& mask
) != 0)
12874 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12875 mantissa
>>= point_pos
- 5;
12877 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12878 floating-point immediate zero with Neon using an integer-zero load, but
12879 that case is handled elsewhere.) */
12883 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12885 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12886 normalized significands are in the range [1, 2). (Our mantissa is shifted
12887 left 4 places at this point relative to normalized IEEE754 values). GCC
12888 internally uses [0.5, 1) (see real.cc), so the exponent returned from
12889 REAL_EXP must be altered. */
12890 exponent
= 5 - exponent
;
12892 if (exponent
< 0 || exponent
> 7)
12895 /* Sign, mantissa and exponent are now in the correct form to plug into the
12896 formula described in the comment above. */
12897 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12900 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12902 vfp3_const_double_rtx (rtx x
)
12907 return vfp3_const_double_index (x
) != -1;
12910 /* Recognize immediates which can be used in various Neon and MVE instructions.
12911 Legal immediates are described by the following table (for VMVN variants, the
12912 bitwise inverse of the constant shown is recognized. In either case, VMOV
12913 is output and the correct instruction to use for a given constant is chosen
12914 by the assembler). The constant shown is replicated across all elements of
12915 the destination vector.
12917 insn elems variant constant (binary)
12918 ---- ----- ------- -----------------
12919 vmov i32 0 00000000 00000000 00000000 abcdefgh
12920 vmov i32 1 00000000 00000000 abcdefgh 00000000
12921 vmov i32 2 00000000 abcdefgh 00000000 00000000
12922 vmov i32 3 abcdefgh 00000000 00000000 00000000
12923 vmov i16 4 00000000 abcdefgh
12924 vmov i16 5 abcdefgh 00000000
12925 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12926 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12927 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12928 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12929 vmvn i16 10 00000000 abcdefgh
12930 vmvn i16 11 abcdefgh 00000000
12931 vmov i32 12 00000000 00000000 abcdefgh 11111111
12932 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12933 vmov i32 14 00000000 abcdefgh 11111111 11111111
12934 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12935 vmov i8 16 abcdefgh
12936 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12937 eeeeeeee ffffffff gggggggg hhhhhhhh
12938 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12939 vmov f32 19 00000000 00000000 00000000 00000000
12941 For case 18, B = !b. Representable values are exactly those accepted by
12942 vfp3_const_double_index, but are output as floating-point numbers rather
12945 For case 19, we will change it to vmov.i32 when assembling.
12947 Variants 0-5 (inclusive) may also be used as immediates for the second
12948 operand of VORR/VBIC instructions.
12950 The INVERSE argument causes the bitwise inverse of the given operand to be
12951 recognized instead (used for recognizing legal immediates for the VAND/VORN
12952 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12953 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12954 output, rather than the real insns vbic/vorr).
12956 INVERSE makes no difference to the recognition of float vectors.
12958 The return value is the variant of immediate as shown in the above table, or
12959 -1 if the given value doesn't match any of the listed patterns.
12962 simd_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12963 rtx
*modconst
, int *elementwidth
)
12965 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12967 for (i = 0; i < idx; i += (STRIDE)) \
12972 immtype = (CLASS); \
12973 elsize = (ELSIZE); \
12977 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12978 unsigned int innersize
;
12979 unsigned char bytes
[16] = {};
12980 int immtype
= -1, matches
;
12981 unsigned int invmask
= inverse
? 0xff : 0;
12982 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12985 n_elts
= CONST_VECTOR_NUNITS (op
);
12989 gcc_assert (mode
!= VOIDmode
);
12992 innersize
= GET_MODE_UNIT_SIZE (mode
);
12994 /* Only support 128-bit vectors for MVE. */
12995 if (TARGET_HAVE_MVE
12997 || VALID_MVE_PRED_MODE (mode
)
12998 || n_elts
* innersize
!= 16))
13001 if (!TARGET_HAVE_MVE
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_BOOL
)
13004 /* Vectors of float constants. */
13005 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
13007 rtx el0
= CONST_VECTOR_ELT (op
, 0);
13009 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
13012 /* FP16 vectors cannot be represented. */
13013 if (GET_MODE_INNER (mode
) == HFmode
)
13016 /* All elements in the vector must be the same. Note that 0.0 and -0.0
13017 are distinct in this context. */
13018 if (!const_vec_duplicate_p (op
))
13022 *modconst
= CONST_VECTOR_ELT (op
, 0);
13027 if (el0
== CONST0_RTX (GET_MODE (el0
)))
13033 /* The tricks done in the code below apply for little-endian vector layout.
13034 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
13035 FIXME: Implement logic for big-endian vectors. */
13036 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
13039 /* Splat vector constant out into a byte vector. */
13040 for (i
= 0; i
< n_elts
; i
++)
13042 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
13043 unsigned HOST_WIDE_INT elpart
;
13045 gcc_assert (CONST_INT_P (el
));
13046 elpart
= INTVAL (el
);
13048 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
13050 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
13051 elpart
>>= BITS_PER_UNIT
;
13055 /* Sanity check. */
13056 gcc_assert (idx
== GET_MODE_SIZE (mode
));
13060 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
13061 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
13063 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
13064 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
13066 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
13067 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
13069 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
13070 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
13072 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
13074 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
13076 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
13077 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
13079 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
13080 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
13082 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13083 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
13085 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13086 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
13088 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
13090 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
13092 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
13093 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
13095 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
13096 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
13098 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13099 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
13101 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
13102 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
13104 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
13106 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
13107 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
13115 *elementwidth
= elsize
;
13119 unsigned HOST_WIDE_INT imm
= 0;
13121 /* Un-invert bytes of recognized vector, if necessary. */
13123 for (i
= 0; i
< idx
; i
++)
13124 bytes
[i
] ^= invmask
;
13128 /* FIXME: Broken on 32-bit H_W_I hosts. */
13129 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
13131 for (i
= 0; i
< 8; i
++)
13132 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
13133 << (i
* BITS_PER_UNIT
);
13135 *modconst
= GEN_INT (imm
);
13139 unsigned HOST_WIDE_INT imm
= 0;
13141 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
13142 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
13144 *modconst
= GEN_INT (imm
);
13152 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13153 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
13154 (or zero for float elements), and a modified constant (whatever should be
13155 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
13156 modified to "simd_immediate_valid_for_move" as this function will be used
13157 both by neon and mve. */
13159 simd_immediate_valid_for_move (rtx op
, machine_mode mode
,
13160 rtx
*modconst
, int *elementwidth
)
13164 int retval
= simd_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
13170 *modconst
= tmpconst
;
13173 *elementwidth
= tmpwidth
;
13178 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
13179 the immediate is valid, write a constant suitable for using as an operand
13180 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13181 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
13184 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
13185 rtx
*modconst
, int *elementwidth
)
13189 int retval
= simd_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
13191 if (retval
< 0 || retval
> 5)
13195 *modconst
= tmpconst
;
13198 *elementwidth
= tmpwidth
;
13203 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
13204 the immediate is valid, write a constant suitable for using as an operand
13205 to VSHR/VSHL to *MODCONST and the corresponding element width to
13206 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13207 because they have different limitations. */
13210 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
13211 rtx
*modconst
, int *elementwidth
,
13214 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
13215 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
13216 unsigned HOST_WIDE_INT last_elt
= 0;
13217 unsigned HOST_WIDE_INT maxshift
;
13219 /* Split vector constant out into a byte vector. */
13220 for (i
= 0; i
< n_elts
; i
++)
13222 rtx el
= CONST_VECTOR_ELT (op
, i
);
13223 unsigned HOST_WIDE_INT elpart
;
13225 if (CONST_INT_P (el
))
13226 elpart
= INTVAL (el
);
13227 else if (CONST_DOUBLE_P (el
))
13230 gcc_unreachable ();
13232 if (i
!= 0 && elpart
!= last_elt
)
13238 /* Shift less than element size. */
13239 maxshift
= innersize
* 8;
13243 /* Left shift immediate value can be from 0 to <size>-1. */
13244 if (last_elt
>= maxshift
)
13249 /* Right shift immediate value can be from 1 to <size>. */
13250 if (last_elt
== 0 || last_elt
> maxshift
)
13255 *elementwidth
= innersize
* 8;
13258 *modconst
= CONST_VECTOR_ELT (op
, 0);
13263 /* Return a string suitable for output of Neon immediate logic operation
13267 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
13268 int inverse
, int quad
)
13270 int width
, is_valid
;
13271 static char templ
[40];
13273 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
13275 gcc_assert (is_valid
!= 0);
13278 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
13280 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
13285 /* Return a string suitable for output of Neon immediate shift operation
13286 (VSHR or VSHL) MNEM. */
13289 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
13290 machine_mode mode
, int quad
,
13293 int width
, is_valid
;
13294 static char templ
[40];
13296 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
13297 gcc_assert (is_valid
!= 0);
13300 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
13302 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
13307 /* Output a sequence of pairwise operations to implement a reduction.
13308 NOTE: We do "too much work" here, because pairwise operations work on two
13309 registers-worth of operands in one go. Unfortunately we can't exploit those
13310 extra calculations to do the full operation in fewer steps, I don't think.
13311 Although all vector elements of the result but the first are ignored, we
13312 actually calculate the same result in each of the elements. An alternative
13313 such as initially loading a vector with zero to use as each of the second
13314 operands would use up an additional register and take an extra instruction,
13315 for no particular gain. */
13318 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
13319 rtx (*reduc
) (rtx
, rtx
, rtx
))
13321 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
13324 for (i
= parts
/ 2; i
>= 1; i
/= 2)
13326 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
13327 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
13332 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13333 loaded into a register using VDUP.
13335 If this is the case, and GENERATE is set, we also generate
13336 instructions to do this and return an RTX to assign to the register. */
13339 neon_vdup_constant (rtx vals
, bool generate
)
13341 machine_mode mode
= GET_MODE (vals
);
13342 machine_mode inner_mode
= GET_MODE_INNER (mode
);
13345 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
13348 if (!const_vec_duplicate_p (vals
, &x
))
13349 /* The elements are not all the same. We could handle repeating
13350 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13351 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13358 /* We can load this constant by using VDUP and a constant in a
13359 single ARM register. This will be cheaper than a vector
13362 x
= copy_to_mode_reg (inner_mode
, x
);
13363 return gen_vec_duplicate (mode
, x
);
13366 /* Return a HI representation of CONST_VEC suitable for MVE predicates. */
13368 mve_bool_vec_to_const (rtx const_vec
)
13370 machine_mode mode
= GET_MODE (const_vec
);
13372 if (!VECTOR_MODE_P (mode
))
13375 unsigned n_elts
= GET_MODE_NUNITS (mode
);
13376 unsigned el_prec
= GET_MODE_PRECISION (GET_MODE_INNER (mode
));
13377 unsigned shift_c
= 16 / n_elts
;
13381 for (i
= 0; i
< n_elts
; i
++)
13383 rtx el
= CONST_VECTOR_ELT (const_vec
, i
);
13384 unsigned HOST_WIDE_INT elpart
;
13386 gcc_assert (CONST_INT_P (el
));
13387 elpart
= INTVAL (el
) & ((1U << el_prec
) - 1);
13389 unsigned index
= BYTES_BIG_ENDIAN
? n_elts
- i
- 1 : i
;
13391 hi_val
|= elpart
<< (index
* shift_c
);
13393 /* We are using mov immediate to encode this constant which writes 32-bits
13394 so we need to make sure the top 16-bits are all 0, otherwise we can't
13395 guarantee we can actually write this immediate. */
13396 return gen_int_mode (hi_val
, SImode
);
13399 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13400 constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13403 If this is the case, and GENERATE is set, we also generate code to do
13404 this and return an RTX to copy into the register. */
13407 neon_make_constant (rtx vals
, bool generate
)
13409 machine_mode mode
= GET_MODE (vals
);
13411 rtx const_vec
= NULL_RTX
;
13412 int n_elts
= GET_MODE_NUNITS (mode
);
13416 if (GET_CODE (vals
) == CONST_VECTOR
)
13418 else if (GET_CODE (vals
) == PARALLEL
)
13420 /* A CONST_VECTOR must contain only CONST_INTs and
13421 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13422 Only store valid constants in a CONST_VECTOR. */
13423 for (i
= 0; i
< n_elts
; ++i
)
13425 rtx x
= XVECEXP (vals
, 0, i
);
13426 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
13429 if (n_const
== n_elts
)
13430 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
13433 gcc_unreachable ();
13435 if (const_vec
!= NULL
13436 && simd_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
13437 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13439 else if (TARGET_HAVE_MVE
&& VALID_MVE_PRED_MODE(mode
))
13440 return mve_bool_vec_to_const (const_vec
);
13441 else if ((target
= neon_vdup_constant (vals
, generate
)) != NULL_RTX
)
13442 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13443 pipeline cycle; creating the constant takes one or two ARM
13444 pipeline cycles. */
13446 else if (const_vec
!= NULL_RTX
)
13447 /* Load from constant pool. On Cortex-A8 this takes two cycles
13448 (for either double or quad vectors). We cannot take advantage
13449 of single-cycle VLD1 because we need a PC-relative addressing
13451 return arm_disable_literal_pool
? NULL_RTX
: const_vec
;
13453 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13454 We cannot construct an initializer. */
13458 /* Initialize vector TARGET to VALS. */
13461 neon_expand_vector_init (rtx target
, rtx vals
)
13463 machine_mode mode
= GET_MODE (target
);
13464 machine_mode inner_mode
= GET_MODE_INNER (mode
);
13465 int n_elts
= GET_MODE_NUNITS (mode
);
13466 int n_var
= 0, one_var
= -1;
13467 bool all_same
= true;
13471 for (i
= 0; i
< n_elts
; ++i
)
13473 x
= XVECEXP (vals
, 0, i
);
13474 if (!CONSTANT_P (x
))
13475 ++n_var
, one_var
= i
;
13477 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
13483 rtx constant
= neon_make_constant (vals
);
13484 if (constant
!= NULL_RTX
)
13486 emit_move_insn (target
, constant
);
13491 /* Splat a single non-constant element if we can. */
13492 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
13494 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
13495 emit_insn (gen_rtx_SET (target
, gen_vec_duplicate (mode
, x
)));
13499 /* One field is non-constant. Load constant then overwrite varying
13500 field. This is more efficient than using the stack. */
13503 rtx copy
= copy_rtx (vals
);
13504 rtx merge_mask
= GEN_INT (1 << one_var
);
13506 /* Load constant part of vector, substitute neighboring value for
13507 varying element. */
13508 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
13509 neon_expand_vector_init (target
, copy
);
13511 /* Insert variable. */
13512 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
13513 emit_insn (gen_vec_set_internal (mode
, target
, x
, merge_mask
, target
));
13517 /* Construct the vector in memory one field at a time
13518 and load the whole vector. */
13519 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
13520 for (i
= 0; i
< n_elts
; i
++)
13521 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
13522 i
* GET_MODE_SIZE (inner_mode
)),
13523 XVECEXP (vals
, 0, i
));
13524 emit_move_insn (target
, mem
);
13527 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13528 ERR if it doesn't. EXP indicates the source location, which includes the
13529 inlining history for intrinsics. */
13532 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
13533 const_tree exp
, const char *desc
)
13535 HOST_WIDE_INT lane
;
13537 gcc_assert (CONST_INT_P (operand
));
13539 lane
= INTVAL (operand
);
13541 if (lane
< low
|| lane
>= high
)
13544 error_at (EXPR_LOCATION (exp
),
13545 "%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
13547 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
13551 /* Bounds-check lanes. */
13554 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
13557 bounds_check (operand
, low
, high
, exp
, "lane");
13560 /* Bounds-check constants. */
13563 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
13565 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
13569 neon_element_bits (machine_mode mode
)
13571 return GET_MODE_UNIT_BITSIZE (mode
);
13575 /* Predicates for `match_operand' and `match_operator'. */
13577 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13578 WB level is 2 if full writeback address modes are allowed, 1
13579 if limited writeback address modes (POST_INC and PRE_DEC) are
13580 allowed and 0 if no writeback at all is supported. */
13583 arm_coproc_mem_operand_wb (rtx op
, int wb_level
)
13585 gcc_assert (wb_level
== 0 || wb_level
== 1 || wb_level
== 2);
13588 /* Reject eliminable registers. */
13589 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
13590 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13591 || reg_mentioned_p (arg_pointer_rtx
, op
)
13592 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13593 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13594 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13595 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13598 /* Constants are converted into offsets from labels. */
13602 ind
= XEXP (op
, 0);
13604 if (reload_completed
13605 && (LABEL_REF_P (ind
)
13606 || (GET_CODE (ind
) == CONST
13607 && GET_CODE (XEXP (ind
, 0)) == PLUS
13608 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13609 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13612 /* Match: (mem (reg)). */
13614 return arm_address_register_rtx_p (ind
, 0);
13616 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13617 acceptable in any case (subject to verification by
13618 arm_address_register_rtx_p). We need full writeback to accept
13619 PRE_INC and POST_DEC, and at least restricted writeback for
13620 PRE_INC and POST_DEC. */
13622 && (GET_CODE (ind
) == POST_INC
13623 || GET_CODE (ind
) == PRE_DEC
13625 && (GET_CODE (ind
) == PRE_INC
13626 || GET_CODE (ind
) == POST_DEC
))))
13627 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13630 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
13631 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
13632 && GET_CODE (XEXP (ind
, 1)) == PLUS
13633 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
13634 ind
= XEXP (ind
, 1);
13640 The encoded immediate for 16-bit modes is multiplied by 2,
13641 while the encoded immediate for 32-bit and 64-bit modes is
13642 multiplied by 4. */
13643 int factor
= MIN (GET_MODE_SIZE (GET_MODE (op
)), 4);
13644 if (GET_CODE (ind
) == PLUS
13645 && REG_P (XEXP (ind
, 0))
13646 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13647 && CONST_INT_P (XEXP (ind
, 1))
13648 && IN_RANGE (INTVAL (XEXP (ind
, 1)), -255 * factor
, 255 * factor
)
13649 && (INTVAL (XEXP (ind
, 1)) & (factor
- 1)) == 0)
13655 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13656 WB is true if full writeback address modes are allowed and is false
13657 if limited writeback address modes (POST_INC and PRE_DEC) are
13660 int arm_coproc_mem_operand (rtx op
, bool wb
)
13662 return arm_coproc_mem_operand_wb (op
, wb
? 2 : 1);
13665 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13666 context in which no writeback address modes are allowed. */
13669 arm_coproc_mem_operand_no_writeback (rtx op
)
13671 return arm_coproc_mem_operand_wb (op
, 0);
13674 /* In non-STRICT mode, return the register number; in STRICT mode return
13675 the hard regno or the replacement if it won't be a mem. Otherwise, return
13676 the original pseudo number. */
13678 arm_effective_regno (rtx op
, bool strict
)
13680 gcc_assert (REG_P (op
));
13681 if (!strict
|| REGNO (op
) < FIRST_PSEUDO_REGISTER
13682 || !reg_renumber
|| reg_renumber
[REGNO (op
)] < 0)
13684 return reg_renumber
[REGNO (op
)];
13687 /* This function returns TRUE on matching mode and op.
13688 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13689 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13691 mve_vector_mem_operand (machine_mode mode
, rtx op
, bool strict
)
13693 enum rtx_code code
;
13696 /* Match: (mem (reg)). */
13699 reg_no
= arm_effective_regno (op
, strict
);
13700 return (((mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
)
13701 ? reg_no
<= LAST_LO_REGNUM
13702 : reg_no
< LAST_ARM_REGNUM
)
13703 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13705 code
= GET_CODE (op
);
13707 if ((code
== POST_INC
13710 || code
== POST_DEC
)
13711 && REG_P (XEXP (op
, 0)))
13713 reg_no
= arm_effective_regno (XEXP (op
, 0), strict
);
13714 return (((mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
)
13715 ? reg_no
<= LAST_LO_REGNUM
13716 :(reg_no
< LAST_ARM_REGNUM
&& reg_no
!= SP_REGNUM
))
13717 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13719 else if (((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
13720 && GET_CODE (XEXP (op
, 1)) == PLUS
13721 && XEXP (op
, 0) == XEXP (XEXP (op
, 1), 0)
13722 && REG_P (XEXP (op
, 0))
13723 && GET_CODE (XEXP (XEXP (op
, 1), 1)) == CONST_INT
)
13724 /* Make sure to only accept PLUS after reload_completed, otherwise
13725 this will interfere with auto_inc's pattern detection. */
13726 || (reload_completed
&& code
== PLUS
&& REG_P (XEXP (op
, 0))
13727 && GET_CODE (XEXP (op
, 1)) == CONST_INT
))
13729 reg_no
= arm_effective_regno (XEXP (op
, 0), strict
);
13731 val
= INTVAL (XEXP (op
, 1));
13733 val
= INTVAL (XEXP(XEXP (op
, 1), 1));
13740 if (abs (val
) > 127)
13747 if (val
% 2 != 0 || abs (val
) > 254)
13752 if (val
% 4 != 0 || abs (val
) > 508)
13758 return ((!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
)
13759 || (MVE_STN_LDW_MODE (mode
)
13760 ? reg_no
<= LAST_LO_REGNUM
13761 : (reg_no
< LAST_ARM_REGNUM
13762 && (code
== PLUS
|| reg_no
!= SP_REGNUM
))));
13767 /* Return TRUE if OP is a memory operand which we can load or store a vector
13768 to/from. TYPE is one of the following values:
13769 0 - Vector load/stor (vldr)
13770 1 - Core registers (ldm)
13771 2 - Element/structure loads (vld1)
13774 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
13778 /* Reject eliminable registers. */
13779 if (strict
&& ! (reload_in_progress
|| reload_completed
)
13780 && (reg_mentioned_p (frame_pointer_rtx
, op
)
13781 || reg_mentioned_p (arg_pointer_rtx
, op
)
13782 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13783 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13784 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13785 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13788 /* Constants are converted into offsets from labels. */
13792 ind
= XEXP (op
, 0);
13794 if (reload_completed
13795 && (LABEL_REF_P (ind
)
13796 || (GET_CODE (ind
) == CONST
13797 && GET_CODE (XEXP (ind
, 0)) == PLUS
13798 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13799 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13802 /* Match: (mem (reg)). */
13804 return arm_address_register_rtx_p (ind
, 0);
13806 /* Allow post-increment with Neon registers. */
13807 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
13808 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
13809 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13811 /* Allow post-increment by register for VLDn */
13812 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
13813 && GET_CODE (XEXP (ind
, 1)) == PLUS
13814 && REG_P (XEXP (XEXP (ind
, 1), 1))
13815 && REG_P (XEXP (ind
, 0))
13816 && rtx_equal_p (XEXP (ind
, 0), XEXP (XEXP (ind
, 1), 0)))
13823 && GET_CODE (ind
) == PLUS
13824 && REG_P (XEXP (ind
, 0))
13825 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13826 && CONST_INT_P (XEXP (ind
, 1))
13827 && INTVAL (XEXP (ind
, 1)) > -1024
13828 /* For quad modes, we restrict the constant offset to be slightly less
13829 than what the instruction format permits. We have no such constraint
13830 on double mode offsets. (This must match arm_legitimate_index_p.) */
13831 && (INTVAL (XEXP (ind
, 1))
13832 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
13833 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13839 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13842 mve_struct_mem_operand (rtx op
)
13844 rtx ind
= XEXP (op
, 0);
13846 /* Match: (mem (reg)). */
13848 return arm_address_register_rtx_p (ind
, 0);
13850 /* Allow only post-increment by the mode size. */
13851 if (GET_CODE (ind
) == POST_INC
)
13852 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13857 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13860 neon_struct_mem_operand (rtx op
)
13864 /* Reject eliminable registers. */
13865 if (! (reload_in_progress
|| reload_completed
)
13866 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13867 || reg_mentioned_p (arg_pointer_rtx
, op
)
13868 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13869 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13870 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13871 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13874 /* Constants are converted into offsets from labels. */
13878 ind
= XEXP (op
, 0);
13880 if (reload_completed
13881 && (LABEL_REF_P (ind
)
13882 || (GET_CODE (ind
) == CONST
13883 && GET_CODE (XEXP (ind
, 0)) == PLUS
13884 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13885 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13888 /* Match: (mem (reg)). */
13890 return arm_address_register_rtx_p (ind
, 0);
13892 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13893 if (GET_CODE (ind
) == POST_INC
13894 || GET_CODE (ind
) == PRE_DEC
)
13895 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13900 /* Prepares the operands for the VCMLA by lane instruction such that the right
13901 register number is selected. This instruction is special in that it always
13902 requires a D register, however there is a choice to be made between Dn[0],
13903 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13905 The VCMLA by lane function always selects two values. For instance given D0
13906 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13907 used by the instruction. However given V4SF then index 0 and 1 are valid as
13908 D0[0] or D1[0] are both valid.
13910 This function centralizes that information based on OPERANDS, OPERANDS[3]
13911 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13912 updated to contain the right index. */
13915 neon_vcmla_lane_prepare_operands (rtx
*operands
)
13917 int lane
= INTVAL (operands
[4]);
13918 machine_mode constmode
= SImode
;
13919 machine_mode mode
= GET_MODE (operands
[3]);
13920 int regno
= REGNO (operands
[3]);
13921 regno
= ((regno
- FIRST_VFP_REGNUM
) >> 1);
13922 if (lane
> 0 && lane
>= GET_MODE_NUNITS (mode
) / 4)
13924 operands
[3] = gen_int_mode (regno
+ 1, constmode
);
13926 = gen_int_mode (lane
- GET_MODE_NUNITS (mode
) / 4, constmode
);
13930 operands
[3] = gen_int_mode (regno
, constmode
);
13931 operands
[4] = gen_int_mode (lane
, constmode
);
13937 /* Return true if X is a register that will be eliminated later on. */
13939 arm_eliminable_register (rtx x
)
13941 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13942 || REGNO (x
) == ARG_POINTER_REGNUM
13943 || VIRTUAL_REGISTER_P (x
));
13946 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13947 coprocessor registers. Otherwise return NO_REGS. */
13950 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13952 if (mode
== HFmode
)
13954 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
13955 return GENERAL_REGS
;
13956 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13958 return GENERAL_REGS
;
13961 /* The neon move patterns handle all legitimate vector and struct
13964 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13965 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13966 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13967 || VALID_NEON_STRUCT_MODE (mode
)))
13970 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13973 return GENERAL_REGS
;
13976 /* Values which must be returned in the most-significant end of the return
13980 arm_return_in_msb (const_tree valtype
)
13982 return (TARGET_AAPCS_BASED
13983 && BYTES_BIG_ENDIAN
13984 && (AGGREGATE_TYPE_P (valtype
)
13985 || TREE_CODE (valtype
) == COMPLEX_TYPE
13986 || FIXED_POINT_TYPE_P (valtype
)));
13989 /* Return TRUE if X references a SYMBOL_REF. */
13991 symbol_mentioned_p (rtx x
)
13996 if (SYMBOL_REF_P (x
))
13999 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
14000 are constant offsets, not symbols. */
14001 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
14004 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
14006 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
14012 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
14013 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
14016 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
14023 /* Return TRUE if X references a LABEL_REF. */
14025 label_mentioned_p (rtx x
)
14030 if (LABEL_REF_P (x
))
14033 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
14034 instruction, but they are constant offsets, not symbols. */
14035 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
14038 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
14039 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
14045 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
14046 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
14049 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
14057 tls_mentioned_p (rtx x
)
14059 switch (GET_CODE (x
))
14062 return tls_mentioned_p (XEXP (x
, 0));
14065 if (XINT (x
, 1) == UNSPEC_TLS
)
14068 /* Fall through. */
14074 /* Must not copy any rtx that uses a pc-relative address.
14075 Also, disallow copying of load-exclusive instructions that
14076 may appear after splitting of compare-and-swap-style operations
14077 so as to prevent those loops from being transformed away from their
14078 canonical forms (see PR 69904). */
14081 arm_cannot_copy_insn_p (rtx_insn
*insn
)
14083 /* The tls call insn cannot be copied, as it is paired with a data
14085 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
14088 subrtx_iterator::array_type array
;
14089 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
14091 const_rtx x
= *iter
;
14092 if (GET_CODE (x
) == UNSPEC
14093 && (XINT (x
, 1) == UNSPEC_PIC_BASE
14094 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
14098 rtx set
= single_set (insn
);
14101 rtx src
= SET_SRC (set
);
14102 if (GET_CODE (src
) == ZERO_EXTEND
)
14103 src
= XEXP (src
, 0);
14105 /* Catch the load-exclusive and load-acquire operations. */
14106 if (GET_CODE (src
) == UNSPEC_VOLATILE
14107 && (XINT (src
, 1) == VUNSPEC_LL
14108 || XINT (src
, 1) == VUNSPEC_LAX
))
14115 minmax_code (rtx x
)
14117 enum rtx_code code
= GET_CODE (x
);
14130 gcc_unreachable ();
14134 /* Match pair of min/max operators that can be implemented via usat/ssat. */
14137 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
14138 int *mask
, bool *signed_sat
)
14140 /* The high bound must be a power of two minus one. */
14141 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
14145 /* The low bound is either zero (for usat) or one less than the
14146 negation of the high bound (for ssat). */
14147 if (INTVAL (lo_bound
) == 0)
14152 *signed_sat
= false;
14157 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
14162 *signed_sat
= true;
14170 /* Return 1 if memory locations are adjacent. */
14172 adjacent_mem_locations (rtx a
, rtx b
)
14174 /* We don't guarantee to preserve the order of these memory refs. */
14175 if (volatile_refs_p (a
) || volatile_refs_p (b
))
14178 if ((REG_P (XEXP (a
, 0))
14179 || (GET_CODE (XEXP (a
, 0)) == PLUS
14180 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
14181 && (REG_P (XEXP (b
, 0))
14182 || (GET_CODE (XEXP (b
, 0)) == PLUS
14183 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
14185 HOST_WIDE_INT val0
= 0, val1
= 0;
14189 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
14191 reg0
= XEXP (XEXP (a
, 0), 0);
14192 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
14195 reg0
= XEXP (a
, 0);
14197 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
14199 reg1
= XEXP (XEXP (b
, 0), 0);
14200 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
14203 reg1
= XEXP (b
, 0);
14205 /* Don't accept any offset that will require multiple
14206 instructions to handle, since this would cause the
14207 arith_adjacentmem pattern to output an overlong sequence. */
14208 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
14211 /* Don't allow an eliminable register: register elimination can make
14212 the offset too large. */
14213 if (arm_eliminable_register (reg0
))
14216 val_diff
= val1
- val0
;
14220 /* If the target has load delay slots, then there's no benefit
14221 to using an ldm instruction unless the offset is zero and
14222 we are optimizing for size. */
14223 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
14224 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
14225 && (val_diff
== 4 || val_diff
== -4));
14228 return ((REGNO (reg0
) == REGNO (reg1
))
14229 && (val_diff
== 4 || val_diff
== -4));
14235 /* Return true if OP is a valid load or store multiple operation. LOAD is true
14236 for load operations, false for store operations. CONSECUTIVE is true
14237 if the register numbers in the operation must be consecutive in the register
14238 bank. RETURN_PC is true if value is to be loaded in PC.
14239 The pattern we are trying to match for load is:
14240 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14241 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14244 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14247 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14248 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14249 3. If consecutive is TRUE, then for kth register being loaded,
14250 REGNO (R_dk) = REGNO (R_d0) + k.
14251 The pattern for store is similar. */
14253 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
14254 bool consecutive
, bool return_pc
)
14256 HOST_WIDE_INT count
= XVECLEN (op
, 0);
14257 rtx reg
, mem
, addr
;
14259 unsigned first_regno
;
14260 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
14262 bool addr_reg_in_reglist
= false;
14263 bool update
= false;
14268 /* If not in SImode, then registers must be consecutive
14269 (e.g., VLDM instructions for DFmode). */
14270 gcc_assert ((mode
== SImode
) || consecutive
);
14271 /* Setting return_pc for stores is illegal. */
14272 gcc_assert (!return_pc
|| load
);
14274 /* Set up the increments and the regs per val based on the mode. */
14275 reg_increment
= GET_MODE_SIZE (mode
);
14276 regs_per_val
= reg_increment
/ 4;
14277 offset_adj
= return_pc
? 1 : 0;
14280 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
14281 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
14284 /* Check if this is a write-back. */
14285 elt
= XVECEXP (op
, 0, offset_adj
);
14286 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
14292 /* The offset adjustment must be the number of registers being
14293 popped times the size of a single register. */
14294 if (!REG_P (SET_DEST (elt
))
14295 || !REG_P (XEXP (SET_SRC (elt
), 0))
14296 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
14297 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
14298 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
14299 ((count
- 1 - offset_adj
) * reg_increment
))
14303 i
= i
+ offset_adj
;
14304 base
= base
+ offset_adj
;
14305 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14306 success depends on the type: VLDM can do just one reg,
14307 LDM must do at least two. */
14308 if ((count
<= i
) && (mode
== SImode
))
14311 elt
= XVECEXP (op
, 0, i
- 1);
14312 if (GET_CODE (elt
) != SET
)
14317 reg
= SET_DEST (elt
);
14318 mem
= SET_SRC (elt
);
14322 reg
= SET_SRC (elt
);
14323 mem
= SET_DEST (elt
);
14326 if (!REG_P (reg
) || !MEM_P (mem
))
14329 regno
= REGNO (reg
);
14330 first_regno
= regno
;
14331 addr
= XEXP (mem
, 0);
14332 if (GET_CODE (addr
) == PLUS
)
14334 if (!CONST_INT_P (XEXP (addr
, 1)))
14337 offset
= INTVAL (XEXP (addr
, 1));
14338 addr
= XEXP (addr
, 0);
14344 /* Don't allow SP to be loaded unless it is also the base register. It
14345 guarantees that SP is reset correctly when an LDM instruction
14346 is interrupted. Otherwise, we might end up with a corrupt stack. */
14347 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
14350 if (regno
== REGNO (addr
))
14351 addr_reg_in_reglist
= true;
14353 for (; i
< count
; i
++)
14355 elt
= XVECEXP (op
, 0, i
);
14356 if (GET_CODE (elt
) != SET
)
14361 reg
= SET_DEST (elt
);
14362 mem
= SET_SRC (elt
);
14366 reg
= SET_SRC (elt
);
14367 mem
= SET_DEST (elt
);
14371 || GET_MODE (reg
) != mode
14372 || REGNO (reg
) <= regno
14375 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
14376 /* Don't allow SP to be loaded unless it is also the base register. It
14377 guarantees that SP is reset correctly when an LDM instruction
14378 is interrupted. Otherwise, we might end up with a corrupt stack. */
14379 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
14381 || GET_MODE (mem
) != mode
14382 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
14383 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
14384 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
14385 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
14386 offset
+ (i
- base
) * reg_increment
))
14387 && (!REG_P (XEXP (mem
, 0))
14388 || offset
+ (i
- base
) * reg_increment
!= 0)))
14391 regno
= REGNO (reg
);
14392 if (regno
== REGNO (addr
))
14393 addr_reg_in_reglist
= true;
14398 if (update
&& addr_reg_in_reglist
)
14401 /* For Thumb-1, address register is always modified - either by write-back
14402 or by explicit load. If the pattern does not describe an update,
14403 then the address register must be in the list of loaded registers. */
14405 return update
|| addr_reg_in_reglist
;
14411 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14412 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14415 [(set (reg:SI <N>) (const_int 0))
14416 (set (reg:SI <M>) (const_int 0))
14418 (unspec_volatile [(const_int 0)]
14420 (clobber (reg:CC CC_REGNUM))
14423 Any number (including 0) of set expressions is valid, the volatile unspec is
14424 optional. All registers but SP and PC are allowed and registers must be in
14425 strict increasing order.
14427 To be a valid VSCCLRM pattern, OP must have the following form:
14429 [(unspec_volatile [(const_int 0)]
14430 VUNSPEC_VSCCLRM_VPR)
14431 (set (reg:SF <N>) (const_int 0))
14432 (set (reg:SF <M>) (const_int 0))
14436 As with CLRM, any number (including 0) of set expressions is valid, however
14437 the volatile unspec is mandatory here. Any VFP single-precision register is
14438 accepted but all registers must be consecutive and in increasing order. */
14441 clear_operation_p (rtx op
, bool vfp
)
14444 unsigned last_regno
= INVALID_REGNUM
;
14445 rtx elt
, reg
, zero
;
14446 int count
= XVECLEN (op
, 0);
14447 int first_set
= vfp
? 1 : 0;
14448 machine_mode expected_mode
= vfp
? E_SFmode
: E_SImode
;
14450 for (int i
= first_set
; i
< count
; i
++)
14452 elt
= XVECEXP (op
, 0, i
);
14454 if (!vfp
&& GET_CODE (elt
) == UNSPEC_VOLATILE
)
14456 if (XINT (elt
, 1) != VUNSPEC_CLRM_APSR
14457 || XVECLEN (elt
, 0) != 1
14458 || XVECEXP (elt
, 0, 0) != CONST0_RTX (SImode
)
14465 if (GET_CODE (elt
) == CLOBBER
)
14468 if (GET_CODE (elt
) != SET
)
14471 reg
= SET_DEST (elt
);
14472 zero
= SET_SRC (elt
);
14475 || GET_MODE (reg
) != expected_mode
14476 || zero
!= CONST0_RTX (SImode
))
14479 regno
= REGNO (reg
);
14483 if (i
!= first_set
&& regno
!= last_regno
+ 1)
14488 if (regno
== SP_REGNUM
|| regno
== PC_REGNUM
)
14490 if (i
!= first_set
&& regno
<= last_regno
)
14494 last_regno
= regno
;
14500 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14501 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14502 instruction. ADD_OFFSET is nonzero if the base address register needs
14503 to be modified with an add instruction before we can use it. */
14506 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
14507 int nops
, HOST_WIDE_INT add_offset
)
14509 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14510 if the offset isn't small enough. The reason 2 ldrs are faster
14511 is because these ARMs are able to do more than one cache access
14512 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14513 whilst the ARM8 has a double bandwidth cache. This means that
14514 these cores can do both an instruction fetch and a data fetch in
14515 a single cycle, so the trick of calculating the address into a
14516 scratch register (one of the result regs) and then doing a load
14517 multiple actually becomes slower (and no smaller in code size).
14518 That is the transformation
14520 ldr rd1, [rbase + offset]
14521 ldr rd2, [rbase + offset + 4]
14525 add rd1, rbase, offset
14526 ldmia rd1, {rd1, rd2}
14528 produces worse code -- '3 cycles + any stalls on rd2' instead of
14529 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14530 access per cycle, the first sequence could never complete in less
14531 than 6 cycles, whereas the ldm sequence would only take 5 and
14532 would make better use of sequential accesses if not hitting the
14535 We cheat here and test 'arm_ld_sched' which we currently know to
14536 only be true for the ARM8, ARM9 and StrongARM. If this ever
14537 changes, then the test below needs to be reworked. */
14538 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
14541 /* XScale has load-store double instructions, but they have stricter
14542 alignment requirements than load-store multiple, so we cannot
14545 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14546 the pipeline until completion.
14554 An ldr instruction takes 1-3 cycles, but does not block the
14563 Best case ldr will always win. However, the more ldr instructions
14564 we issue, the less likely we are to be able to schedule them well.
14565 Using ldr instructions also increases code size.
14567 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14568 for counts of 3 or 4 regs. */
14569 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
14574 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14575 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14576 an array ORDER which describes the sequence to use when accessing the
14577 offsets that produces an ascending order. In this sequence, each
14578 offset must be larger by exactly 4 than the previous one. ORDER[0]
14579 must have been filled in with the lowest offset by the caller.
14580 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14581 we use to verify that ORDER produces an ascending order of registers.
14582 Return true if it was possible to construct such an order, false if
14586 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
14587 int *unsorted_regs
)
14590 for (i
= 1; i
< nops
; i
++)
14594 order
[i
] = order
[i
- 1];
14595 for (j
= 0; j
< nops
; j
++)
14596 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
14598 /* We must find exactly one offset that is higher than the
14599 previous one by 4. */
14600 if (order
[i
] != order
[i
- 1])
14604 if (order
[i
] == order
[i
- 1])
14606 /* The register numbers must be ascending. */
14607 if (unsorted_regs
!= NULL
14608 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
14614 /* Used to determine in a peephole whether a sequence of load
14615 instructions can be changed into a load-multiple instruction.
14616 NOPS is the number of separate load instructions we are examining. The
14617 first NOPS entries in OPERANDS are the destination registers, the
14618 next NOPS entries are memory operands. If this function is
14619 successful, *BASE is set to the common base register of the memory
14620 accesses; *LOAD_OFFSET is set to the first memory location's offset
14621 from that base register.
14622 REGS is an array filled in with the destination register numbers.
14623 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14624 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14625 the sequence of registers in REGS matches the loads from ascending memory
14626 locations, and the function verifies that the register numbers are
14627 themselves ascending. If CHECK_REGS is false, the register numbers
14628 are stored in the order they are found in the operands. */
14630 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
14631 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
14633 int unsorted_regs
[MAX_LDM_STM_OPS
];
14634 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
14635 int order
[MAX_LDM_STM_OPS
];
14639 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14640 easily extended if required. */
14641 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
14643 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
14645 /* Loop over the operands and check that the memory references are
14646 suitable (i.e. immediate offsets from the same base register). At
14647 the same time, extract the target register, and the memory
14649 for (i
= 0; i
< nops
; i
++)
14654 /* Convert a subreg of a mem into the mem itself. */
14655 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
14656 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
14658 gcc_assert (MEM_P (operands
[nops
+ i
]));
14660 /* Don't reorder volatile memory references; it doesn't seem worth
14661 looking for the case where the order is ok anyway. */
14662 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
14665 offset
= const0_rtx
;
14667 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
14669 && REG_P (reg
= SUBREG_REG (reg
))))
14670 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
14671 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
14673 && REG_P (reg
= SUBREG_REG (reg
))))
14674 && (CONST_INT_P (offset
14675 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
14679 base_reg
= REGNO (reg
);
14680 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
14683 else if (base_reg
!= (int) REGNO (reg
))
14684 /* Not addressed from the same base register. */
14687 unsorted_regs
[i
] = (REG_P (operands
[i
])
14688 ? REGNO (operands
[i
])
14689 : REGNO (SUBREG_REG (operands
[i
])));
14691 /* If it isn't an integer register, or if it overwrites the
14692 base register but isn't the last insn in the list, then
14693 we can't do this. */
14694 if (unsorted_regs
[i
] < 0
14695 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
14696 || unsorted_regs
[i
] > 14
14697 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
14700 /* Don't allow SP to be loaded unless it is also the base
14701 register. It guarantees that SP is reset correctly when
14702 an LDM instruction is interrupted. Otherwise, we might
14703 end up with a corrupt stack. */
14704 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
14707 unsorted_offsets
[i
] = INTVAL (offset
);
14708 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
14712 /* Not a suitable memory address. */
14716 /* All the useful information has now been extracted from the
14717 operands into unsorted_regs and unsorted_offsets; additionally,
14718 order[0] has been set to the lowest offset in the list. Sort
14719 the offsets into order, verifying that they are adjacent, and
14720 check that the register numbers are ascending. */
14721 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
14722 check_regs
? unsorted_regs
: NULL
))
14726 memcpy (saved_order
, order
, sizeof order
);
14732 for (i
= 0; i
< nops
; i
++)
14733 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
14735 *load_offset
= unsorted_offsets
[order
[0]];
14738 if (unsorted_offsets
[order
[0]] == 0)
14739 ldm_case
= 1; /* ldmia */
14740 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14741 ldm_case
= 2; /* ldmib */
14742 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14743 ldm_case
= 3; /* ldmda */
14744 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14745 ldm_case
= 4; /* ldmdb */
14746 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
14747 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
14752 if (!multiple_operation_profitable_p (false, nops
,
14754 ? unsorted_offsets
[order
[0]] : 0))
14760 /* Used to determine in a peephole whether a sequence of store instructions can
14761 be changed into a store-multiple instruction.
14762 NOPS is the number of separate store instructions we are examining.
14763 NOPS_TOTAL is the total number of instructions recognized by the peephole
14765 The first NOPS entries in OPERANDS are the source registers, the next
14766 NOPS entries are memory operands. If this function is successful, *BASE is
14767 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14768 to the first memory location's offset from that base register. REGS is an
14769 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14770 likewise filled with the corresponding rtx's.
14771 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14772 numbers to an ascending order of stores.
14773 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14774 from ascending memory locations, and the function verifies that the register
14775 numbers are themselves ascending. If CHECK_REGS is false, the register
14776 numbers are stored in the order they are found in the operands. */
14778 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
14779 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
14780 HOST_WIDE_INT
*load_offset
, bool check_regs
)
14782 int unsorted_regs
[MAX_LDM_STM_OPS
];
14783 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
14784 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
14785 int order
[MAX_LDM_STM_OPS
];
14787 rtx base_reg_rtx
= NULL
;
14790 /* Write back of base register is currently only supported for Thumb 1. */
14791 int base_writeback
= TARGET_THUMB1
;
14793 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14794 easily extended if required. */
14795 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
14797 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
14799 /* Loop over the operands and check that the memory references are
14800 suitable (i.e. immediate offsets from the same base register). At
14801 the same time, extract the target register, and the memory
14803 for (i
= 0; i
< nops
; i
++)
14808 /* Convert a subreg of a mem into the mem itself. */
14809 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
14810 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
14812 gcc_assert (MEM_P (operands
[nops
+ i
]));
14814 /* Don't reorder volatile memory references; it doesn't seem worth
14815 looking for the case where the order is ok anyway. */
14816 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
14819 offset
= const0_rtx
;
14821 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
14823 && REG_P (reg
= SUBREG_REG (reg
))))
14824 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
14825 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
14827 && REG_P (reg
= SUBREG_REG (reg
))))
14828 && (CONST_INT_P (offset
14829 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
14831 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
14832 ? operands
[i
] : SUBREG_REG (operands
[i
]));
14833 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
14837 base_reg
= REGNO (reg
);
14838 base_reg_rtx
= reg
;
14839 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
14842 else if (base_reg
!= (int) REGNO (reg
))
14843 /* Not addressed from the same base register. */
14846 /* If it isn't an integer register, then we can't do this. */
14847 if (unsorted_regs
[i
] < 0
14848 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
14849 /* The effects are unpredictable if the base register is
14850 both updated and stored. */
14851 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
14852 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
14853 || unsorted_regs
[i
] > 14)
14856 unsorted_offsets
[i
] = INTVAL (offset
);
14857 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
14861 /* Not a suitable memory address. */
14865 /* All the useful information has now been extracted from the
14866 operands into unsorted_regs and unsorted_offsets; additionally,
14867 order[0] has been set to the lowest offset in the list. Sort
14868 the offsets into order, verifying that they are adjacent, and
14869 check that the register numbers are ascending. */
14870 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
14871 check_regs
? unsorted_regs
: NULL
))
14875 memcpy (saved_order
, order
, sizeof order
);
14881 for (i
= 0; i
< nops
; i
++)
14883 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
14885 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
14888 *load_offset
= unsorted_offsets
[order
[0]];
14892 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
14895 if (unsorted_offsets
[order
[0]] == 0)
14896 stm_case
= 1; /* stmia */
14897 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14898 stm_case
= 2; /* stmib */
14899 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14900 stm_case
= 3; /* stmda */
14901 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14902 stm_case
= 4; /* stmdb */
14906 if (!multiple_operation_profitable_p (false, nops
, 0))
14912 /* Routines for use in generating RTL. */
14914 /* Generate a load-multiple instruction. COUNT is the number of loads in
14915 the instruction; REGS and MEMS are arrays containing the operands.
14916 BASEREG is the base register to be used in addressing the memory operands.
14917 WBACK_OFFSET is nonzero if the instruction should update the base
14921 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14922 HOST_WIDE_INT wback_offset
)
14927 if (!multiple_operation_profitable_p (false, count
, 0))
14933 for (i
= 0; i
< count
; i
++)
14934 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
14936 if (wback_offset
!= 0)
14937 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14939 seq
= get_insns ();
14945 result
= gen_rtx_PARALLEL (VOIDmode
,
14946 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14947 if (wback_offset
!= 0)
14949 XVECEXP (result
, 0, 0)
14950 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14955 for (j
= 0; i
< count
; i
++, j
++)
14956 XVECEXP (result
, 0, i
)
14957 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
14962 /* Generate a store-multiple instruction. COUNT is the number of stores in
14963 the instruction; REGS and MEMS are arrays containing the operands.
14964 BASEREG is the base register to be used in addressing the memory operands.
14965 WBACK_OFFSET is nonzero if the instruction should update the base
14969 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14970 HOST_WIDE_INT wback_offset
)
14975 if (GET_CODE (basereg
) == PLUS
)
14976 basereg
= XEXP (basereg
, 0);
14978 if (!multiple_operation_profitable_p (false, count
, 0))
14984 for (i
= 0; i
< count
; i
++)
14985 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
14987 if (wback_offset
!= 0)
14988 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14990 seq
= get_insns ();
14996 result
= gen_rtx_PARALLEL (VOIDmode
,
14997 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14998 if (wback_offset
!= 0)
15000 XVECEXP (result
, 0, 0)
15001 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
15006 for (j
= 0; i
< count
; i
++, j
++)
15007 XVECEXP (result
, 0, i
)
15008 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
15013 /* Generate either a load-multiple or a store-multiple instruction. This
15014 function can be used in situations where we can start with a single MEM
15015 rtx and adjust its address upwards.
15016 COUNT is the number of operations in the instruction, not counting a
15017 possible update of the base register. REGS is an array containing the
15019 BASEREG is the base register to be used in addressing the memory operands,
15020 which are constructed from BASEMEM.
15021 WRITE_BACK specifies whether the generated instruction should include an
15022 update of the base register.
15023 OFFSETP is used to pass an offset to and from this function; this offset
15024 is not used when constructing the address (instead BASEMEM should have an
15025 appropriate offset in its address), it is used only for setting
15026 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
15029 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
15030 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
15032 rtx mems
[MAX_LDM_STM_OPS
];
15033 HOST_WIDE_INT offset
= *offsetp
;
15036 gcc_assert (count
<= MAX_LDM_STM_OPS
);
15038 if (GET_CODE (basereg
) == PLUS
)
15039 basereg
= XEXP (basereg
, 0);
15041 for (i
= 0; i
< count
; i
++)
15043 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
15044 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
15052 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
15053 write_back
? 4 * count
: 0);
15055 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
15056 write_back
? 4 * count
: 0);
15060 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
15061 rtx basemem
, HOST_WIDE_INT
*offsetp
)
15063 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
15068 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
15069 rtx basemem
, HOST_WIDE_INT
*offsetp
)
15071 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
15075 /* Called from a peephole2 expander to turn a sequence of loads into an
15076 LDM instruction. OPERANDS are the operands found by the peephole matcher;
15077 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
15078 is true if we can reorder the registers because they are used commutatively
15080 Returns true iff we could generate a new instruction. */
15083 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
15085 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
15086 rtx mems
[MAX_LDM_STM_OPS
];
15087 int i
, j
, base_reg
;
15089 HOST_WIDE_INT offset
;
15090 int write_back
= FALSE
;
15094 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
15095 &base_reg
, &offset
, !sort_regs
);
15101 for (i
= 0; i
< nops
- 1; i
++)
15102 for (j
= i
+ 1; j
< nops
; j
++)
15103 if (regs
[i
] > regs
[j
])
15109 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15113 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
15115 /* Thumb-1 ldm uses writeback except if the base is loaded. */
15117 for (i
= 0; i
< nops
; i
++)
15118 if (base_reg
== regs
[i
])
15119 write_back
= false;
15121 /* Ensure the base is dead if it is updated. */
15122 if (write_back
&& !peep2_reg_dead_p (nops
, base_reg_rtx
))
15128 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
15129 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
15131 base_reg_rtx
= newbase
;
15134 for (i
= 0; i
< nops
; i
++)
15136 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15137 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15140 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
15141 write_back
? offset
+ i
* 4 : 0));
15145 /* Called from a peephole2 expander to turn a sequence of stores into an
15146 STM instruction. OPERANDS are the operands found by the peephole matcher;
15147 NOPS indicates how many separate stores we are trying to combine.
15148 Returns true iff we could generate a new instruction. */
15151 gen_stm_seq (rtx
*operands
, int nops
)
15154 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
15155 rtx mems
[MAX_LDM_STM_OPS
];
15158 HOST_WIDE_INT offset
;
15159 int write_back
= FALSE
;
15162 bool base_reg_dies
;
15164 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
15165 mem_order
, &base_reg
, &offset
, true);
15170 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15172 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
15175 gcc_assert (base_reg_dies
);
15181 gcc_assert (base_reg_dies
);
15182 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
15186 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
15188 for (i
= 0; i
< nops
; i
++)
15190 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15191 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15194 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
15195 write_back
? offset
+ i
* 4 : 0));
15199 /* Called from a peephole2 expander to turn a sequence of stores that are
15200 preceded by constant loads into an STM instruction. OPERANDS are the
15201 operands found by the peephole matcher; NOPS indicates how many
15202 separate stores we are trying to combine; there are 2 * NOPS
15203 instructions in the peephole.
15204 Returns true iff we could generate a new instruction. */
15207 gen_const_stm_seq (rtx
*operands
, int nops
)
15209 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
15210 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
15211 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
15212 rtx mems
[MAX_LDM_STM_OPS
];
15215 HOST_WIDE_INT offset
;
15216 int write_back
= FALSE
;
15219 bool base_reg_dies
;
15221 HARD_REG_SET allocated
;
15223 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
15224 mem_order
, &base_reg
, &offset
, false);
15229 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
15231 /* If the same register is used more than once, try to find a free
15233 CLEAR_HARD_REG_SET (allocated
);
15234 for (i
= 0; i
< nops
; i
++)
15236 for (j
= i
+ 1; j
< nops
; j
++)
15237 if (regs
[i
] == regs
[j
])
15239 rtx t
= peep2_find_free_register (0, nops
* 2,
15240 TARGET_THUMB1
? "l" : "r",
15241 SImode
, &allocated
);
15245 regs
[i
] = REGNO (t
);
15249 /* Compute an ordering that maps the register numbers to an ascending
15252 for (i
= 0; i
< nops
; i
++)
15253 if (regs
[i
] < regs
[reg_order
[0]])
15256 for (i
= 1; i
< nops
; i
++)
15258 int this_order
= reg_order
[i
- 1];
15259 for (j
= 0; j
< nops
; j
++)
15260 if (regs
[j
] > regs
[reg_order
[i
- 1]]
15261 && (this_order
== reg_order
[i
- 1]
15262 || regs
[j
] < regs
[this_order
]))
15264 reg_order
[i
] = this_order
;
15267 /* Ensure that registers that must be live after the instruction end
15268 up with the correct value. */
15269 for (i
= 0; i
< nops
; i
++)
15271 int this_order
= reg_order
[i
];
15272 if ((this_order
!= mem_order
[i
]
15273 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
15274 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
15278 /* Load the constants. */
15279 for (i
= 0; i
< nops
; i
++)
15281 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
15282 sorted_regs
[i
] = regs
[reg_order
[i
]];
15283 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
15286 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15288 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
15291 gcc_assert (base_reg_dies
);
15297 gcc_assert (base_reg_dies
);
15298 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
15302 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
15304 for (i
= 0; i
< nops
; i
++)
15306 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15307 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15310 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
15311 write_back
? offset
+ i
* 4 : 0));
15315 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15316 unaligned copies on processors which support unaligned semantics for those
15317 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
15318 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15319 An interleave factor of 1 (the minimum) will perform no interleaving.
15320 Load/store multiple are used for aligned addresses where possible. */
15323 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
15324 HOST_WIDE_INT length
,
15325 unsigned int interleave_factor
)
15327 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
15328 int *regnos
= XALLOCAVEC (int, interleave_factor
);
15329 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
15330 HOST_WIDE_INT i
, j
;
15331 HOST_WIDE_INT remaining
= length
, words
;
15332 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
15334 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
15335 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
15336 HOST_WIDE_INT srcoffset
, dstoffset
;
15337 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
15340 gcc_assert (interleave_factor
>= 1 && interleave_factor
<= 4);
15342 /* Use hard registers if we have aligned source or destination so we can use
15343 load/store multiple with contiguous registers. */
15344 if (dst_aligned
|| src_aligned
)
15345 for (i
= 0; i
< interleave_factor
; i
++)
15346 regs
[i
] = gen_rtx_REG (SImode
, i
);
15348 for (i
= 0; i
< interleave_factor
; i
++)
15349 regs
[i
] = gen_reg_rtx (SImode
);
15351 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
15352 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
15354 srcoffset
= dstoffset
= 0;
15356 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15357 For copying the last bytes we want to subtract this offset again. */
15358 src_autoinc
= dst_autoinc
= 0;
15360 for (i
= 0; i
< interleave_factor
; i
++)
15363 /* Copy BLOCK_SIZE_BYTES chunks. */
15365 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
15368 if (src_aligned
&& interleave_factor
> 1)
15370 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
15371 TRUE
, srcbase
, &srcoffset
));
15372 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
15376 for (j
= 0; j
< interleave_factor
; j
++)
15378 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
15380 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
15381 srcoffset
+ j
* UNITS_PER_WORD
);
15382 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
15384 srcoffset
+= block_size_bytes
;
15388 if (dst_aligned
&& interleave_factor
> 1)
15390 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
15391 TRUE
, dstbase
, &dstoffset
));
15392 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
15396 for (j
= 0; j
< interleave_factor
; j
++)
15398 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
15400 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
15401 dstoffset
+ j
* UNITS_PER_WORD
);
15402 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
15404 dstoffset
+= block_size_bytes
;
15407 remaining
-= block_size_bytes
;
15410 /* Copy any whole words left (note these aren't interleaved with any
15411 subsequent halfword/byte load/stores in the interests of simplicity). */
15413 words
= remaining
/ UNITS_PER_WORD
;
15415 gcc_assert (words
< interleave_factor
);
15417 if (src_aligned
&& words
> 1)
15419 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
15421 src_autoinc
+= UNITS_PER_WORD
* words
;
15425 for (j
= 0; j
< words
; j
++)
15427 addr
= plus_constant (Pmode
, src
,
15428 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
15429 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
15430 srcoffset
+ j
* UNITS_PER_WORD
);
15432 emit_move_insn (regs
[j
], mem
);
15434 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
15436 srcoffset
+= words
* UNITS_PER_WORD
;
15439 if (dst_aligned
&& words
> 1)
15441 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
15443 dst_autoinc
+= words
* UNITS_PER_WORD
;
15447 for (j
= 0; j
< words
; j
++)
15449 addr
= plus_constant (Pmode
, dst
,
15450 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
15451 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
15452 dstoffset
+ j
* UNITS_PER_WORD
);
15454 emit_move_insn (mem
, regs
[j
]);
15456 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
15458 dstoffset
+= words
* UNITS_PER_WORD
;
15461 remaining
-= words
* UNITS_PER_WORD
;
15463 gcc_assert (remaining
< 4);
15465 /* Copy a halfword if necessary. */
15467 if (remaining
>= 2)
15469 halfword_tmp
= gen_reg_rtx (SImode
);
15471 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
15472 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
15473 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
15475 /* Either write out immediately, or delay until we've loaded the last
15476 byte, depending on interleave factor. */
15477 if (interleave_factor
== 1)
15479 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15480 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
15481 emit_insn (gen_unaligned_storehi (mem
,
15482 gen_lowpart (HImode
, halfword_tmp
)));
15483 halfword_tmp
= NULL
;
15491 gcc_assert (remaining
< 2);
15493 /* Copy last byte. */
15495 if ((remaining
& 1) != 0)
15497 byte_tmp
= gen_reg_rtx (SImode
);
15499 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
15500 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
15501 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
15503 if (interleave_factor
== 1)
15505 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15506 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
15507 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
15516 /* Store last halfword if we haven't done so already. */
15520 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15521 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
15522 emit_insn (gen_unaligned_storehi (mem
,
15523 gen_lowpart (HImode
, halfword_tmp
)));
15527 /* Likewise for last byte. */
15531 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15532 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
15533 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
15537 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
15540 /* From mips_adjust_block_mem:
15542 Helper function for doing a loop-based block operation on memory
15543 reference MEM. Each iteration of the loop will operate on LENGTH
15546 Create a new base register for use within the loop and point it to
15547 the start of MEM. Create a new memory reference that uses this
15548 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15551 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
15554 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
15556 /* Although the new mem does not refer to a known location,
15557 it does keep up to LENGTH bytes of alignment. */
15558 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
15559 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
15562 /* From mips_block_move_loop:
15564 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15565 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15566 the memory regions do not overlap. */
15569 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
15570 unsigned int interleave_factor
,
15571 HOST_WIDE_INT bytes_per_iter
)
15573 rtx src_reg
, dest_reg
, final_src
, test
;
15574 HOST_WIDE_INT leftover
;
15576 leftover
= length
% bytes_per_iter
;
15577 length
-= leftover
;
15579 /* Create registers and memory references for use within the loop. */
15580 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
15581 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
15583 /* Calculate the value that SRC_REG should have after the last iteration of
15585 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
15586 0, 0, OPTAB_WIDEN
);
15588 /* Emit the start of the loop. */
15589 rtx_code_label
*label
= gen_label_rtx ();
15590 emit_label (label
);
15592 /* Emit the loop body. */
15593 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
15594 interleave_factor
);
15596 /* Move on to the next block. */
15597 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
15598 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
15600 /* Emit the loop condition. */
15601 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
15602 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
15604 /* Mop up any left-over bytes. */
15606 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
15609 /* Emit a block move when either the source or destination is unaligned (not
15610 aligned to a four-byte boundary). This may need further tuning depending on
15611 core type, optimize_size setting, etc. */
15614 arm_cpymemqi_unaligned (rtx
*operands
)
15616 HOST_WIDE_INT length
= INTVAL (operands
[2]);
15620 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
15621 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
15622 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15623 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15624 or dst_aligned though: allow more interleaving in those cases since the
15625 resulting code can be smaller. */
15626 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
15627 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
15630 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
15631 interleave_factor
, bytes_per_iter
);
15633 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
15634 interleave_factor
);
15638 /* Note that the loop created by arm_block_move_unaligned_loop may be
15639 subject to loop unrolling, which makes tuning this condition a little
15642 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
15644 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
15651 arm_gen_cpymemqi (rtx
*operands
)
15653 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
15654 HOST_WIDE_INT srcoffset
, dstoffset
;
15655 rtx src
, dst
, srcbase
, dstbase
;
15656 rtx part_bytes_reg
= NULL
;
15659 if (!CONST_INT_P (operands
[2])
15660 || !CONST_INT_P (operands
[3])
15661 || INTVAL (operands
[2]) > 64)
15664 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
15665 return arm_cpymemqi_unaligned (operands
);
15667 if (INTVAL (operands
[3]) & 3)
15670 dstbase
= operands
[0];
15671 srcbase
= operands
[1];
15673 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
15674 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
15676 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
15677 out_words_to_go
= INTVAL (operands
[2]) / 4;
15678 last_bytes
= INTVAL (operands
[2]) & 3;
15679 dstoffset
= srcoffset
= 0;
15681 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
15682 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
15684 while (in_words_to_go
>= 2)
15686 if (in_words_to_go
> 4)
15687 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
15688 TRUE
, srcbase
, &srcoffset
));
15690 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
15691 src
, FALSE
, srcbase
,
15694 if (out_words_to_go
)
15696 if (out_words_to_go
> 4)
15697 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
15698 TRUE
, dstbase
, &dstoffset
));
15699 else if (out_words_to_go
!= 1)
15700 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
15701 out_words_to_go
, dst
,
15704 dstbase
, &dstoffset
));
15707 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
15708 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
15709 if (last_bytes
!= 0)
15711 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
15717 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
15718 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
15721 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15722 if (out_words_to_go
)
15726 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
15727 sreg
= copy_to_reg (mem
);
15729 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
15730 emit_move_insn (mem
, sreg
);
15733 gcc_assert (!in_words_to_go
); /* Sanity check */
15736 if (in_words_to_go
)
15738 gcc_assert (in_words_to_go
> 0);
15740 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
15741 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
15744 gcc_assert (!last_bytes
|| part_bytes_reg
);
15746 if (BYTES_BIG_ENDIAN
&& last_bytes
)
15748 rtx tmp
= gen_reg_rtx (SImode
);
15750 /* The bytes we want are in the top end of the word. */
15751 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
15752 GEN_INT (8 * (4 - last_bytes
))));
15753 part_bytes_reg
= tmp
;
15757 mem
= adjust_automodify_address (dstbase
, QImode
,
15758 plus_constant (Pmode
, dst
,
15760 dstoffset
+ last_bytes
- 1);
15761 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
15765 tmp
= gen_reg_rtx (SImode
);
15766 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
15767 part_bytes_reg
= tmp
;
15774 if (last_bytes
> 1)
15776 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
15777 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
15781 rtx tmp
= gen_reg_rtx (SImode
);
15782 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
15783 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
15784 part_bytes_reg
= tmp
;
15791 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
15792 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
15799 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15802 next_consecutive_mem (rtx mem
)
15804 machine_mode mode
= GET_MODE (mem
);
15805 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
15806 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
15808 return adjust_automodify_address (mem
, mode
, addr
, offset
);
15811 /* Copy using LDRD/STRD instructions whenever possible.
15812 Returns true upon success. */
15814 gen_cpymem_ldrd_strd (rtx
*operands
)
15816 unsigned HOST_WIDE_INT len
;
15817 HOST_WIDE_INT align
;
15818 rtx src
, dst
, base
;
15820 bool src_aligned
, dst_aligned
;
15821 bool src_volatile
, dst_volatile
;
15823 gcc_assert (CONST_INT_P (operands
[2]));
15824 gcc_assert (CONST_INT_P (operands
[3]));
15826 len
= UINTVAL (operands
[2]);
15830 /* Maximum alignment we can assume for both src and dst buffers. */
15831 align
= INTVAL (operands
[3]);
15833 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
15836 /* Place src and dst addresses in registers
15837 and update the corresponding mem rtx. */
15839 dst_volatile
= MEM_VOLATILE_P (dst
);
15840 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
15841 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
15842 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
15845 src_volatile
= MEM_VOLATILE_P (src
);
15846 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
15847 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
15848 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
15850 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
15853 if (src_volatile
|| dst_volatile
)
15856 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15857 if (!(dst_aligned
|| src_aligned
))
15858 return arm_gen_cpymemqi (operands
);
15860 /* If the either src or dst is unaligned we'll be accessing it as pairs
15861 of unaligned SImode accesses. Otherwise we can generate DImode
15862 ldrd/strd instructions. */
15863 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
15864 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
15869 reg0
= gen_reg_rtx (DImode
);
15870 rtx first_reg
= NULL_RTX
;
15871 rtx second_reg
= NULL_RTX
;
15873 if (!src_aligned
|| !dst_aligned
)
15875 if (BYTES_BIG_ENDIAN
)
15877 second_reg
= gen_lowpart (SImode
, reg0
);
15878 first_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
15882 first_reg
= gen_lowpart (SImode
, reg0
);
15883 second_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
15886 if (MEM_ALIGN (src
) >= 2 * BITS_PER_WORD
)
15887 emit_move_insn (reg0
, src
);
15888 else if (src_aligned
)
15889 emit_insn (gen_unaligned_loaddi (reg0
, src
));
15892 emit_insn (gen_unaligned_loadsi (first_reg
, src
));
15893 src
= next_consecutive_mem (src
);
15894 emit_insn (gen_unaligned_loadsi (second_reg
, src
));
15897 if (MEM_ALIGN (dst
) >= 2 * BITS_PER_WORD
)
15898 emit_move_insn (dst
, reg0
);
15899 else if (dst_aligned
)
15900 emit_insn (gen_unaligned_storedi (dst
, reg0
));
15903 emit_insn (gen_unaligned_storesi (dst
, first_reg
));
15904 dst
= next_consecutive_mem (dst
);
15905 emit_insn (gen_unaligned_storesi (dst
, second_reg
));
15908 src
= next_consecutive_mem (src
);
15909 dst
= next_consecutive_mem (dst
);
15912 gcc_assert (len
< 8);
15915 /* More than a word but less than a double-word to copy. Copy a word. */
15916 reg0
= gen_reg_rtx (SImode
);
15917 src
= adjust_address (src
, SImode
, 0);
15918 dst
= adjust_address (dst
, SImode
, 0);
15920 emit_move_insn (reg0
, src
);
15922 emit_insn (gen_unaligned_loadsi (reg0
, src
));
15925 emit_move_insn (dst
, reg0
);
15927 emit_insn (gen_unaligned_storesi (dst
, reg0
));
15929 src
= next_consecutive_mem (src
);
15930 dst
= next_consecutive_mem (dst
);
15937 /* Copy the remaining bytes. */
15940 dst
= adjust_address (dst
, HImode
, 0);
15941 src
= adjust_address (src
, HImode
, 0);
15942 reg0
= gen_reg_rtx (SImode
);
15944 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
15946 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
15949 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
15951 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
15953 src
= next_consecutive_mem (src
);
15954 dst
= next_consecutive_mem (dst
);
15959 dst
= adjust_address (dst
, QImode
, 0);
15960 src
= adjust_address (src
, QImode
, 0);
15961 reg0
= gen_reg_rtx (QImode
);
15962 emit_move_insn (reg0
, src
);
15963 emit_move_insn (dst
, reg0
);
15967 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15968 into its component 32-bit subregs. OP2 may be an immediate
15969 constant and we want to simplify it in that case. */
15971 arm_decompose_di_binop (rtx op1
, rtx op2
, rtx
*lo_op1
, rtx
*hi_op1
,
15972 rtx
*lo_op2
, rtx
*hi_op2
)
15974 *lo_op1
= gen_lowpart (SImode
, op1
);
15975 *hi_op1
= gen_highpart (SImode
, op1
);
15976 *lo_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
15977 subreg_lowpart_offset (SImode
, DImode
));
15978 *hi_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
15979 subreg_highpart_offset (SImode
, DImode
));
15982 /* Select a dominance comparison mode if possible for a test of the general
15983 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15984 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15985 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15986 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15987 In all cases OP will be either EQ or NE, but we don't need to know which
15988 here. If we are unable to support a dominance comparison we return
15989 CC mode. This will then fail to match for the RTL expressions that
15990 generate this call. */
15992 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
15994 enum rtx_code cond1
, cond2
;
15997 /* Currently we will probably get the wrong result if the individual
15998 comparisons are not simple. This also ensures that it is safe to
15999 reverse a comparison if necessary. */
16000 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
16002 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
16006 /* The if_then_else variant of this tests the second condition if the
16007 first passes, but is true if the first fails. Reverse the first
16008 condition to get a true "inclusive-or" expression. */
16009 if (cond_or
== DOM_CC_NX_OR_Y
)
16010 cond1
= reverse_condition (cond1
);
16012 /* If the comparisons are not equal, and one doesn't dominate the other,
16013 then we can't do this. */
16015 && !comparison_dominates_p (cond1
, cond2
)
16016 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
16020 std::swap (cond1
, cond2
);
16025 if (cond_or
== DOM_CC_X_AND_Y
)
16030 case EQ
: return CC_DEQmode
;
16031 case LE
: return CC_DLEmode
;
16032 case LEU
: return CC_DLEUmode
;
16033 case GE
: return CC_DGEmode
;
16034 case GEU
: return CC_DGEUmode
;
16035 default: gcc_unreachable ();
16039 if (cond_or
== DOM_CC_X_AND_Y
)
16051 gcc_unreachable ();
16055 if (cond_or
== DOM_CC_X_AND_Y
)
16067 gcc_unreachable ();
16071 if (cond_or
== DOM_CC_X_AND_Y
)
16072 return CC_DLTUmode
;
16077 return CC_DLTUmode
;
16079 return CC_DLEUmode
;
16083 gcc_unreachable ();
16087 if (cond_or
== DOM_CC_X_AND_Y
)
16088 return CC_DGTUmode
;
16093 return CC_DGTUmode
;
16095 return CC_DGEUmode
;
16099 gcc_unreachable ();
16102 /* The remaining cases only occur when both comparisons are the
16105 gcc_assert (cond1
== cond2
);
16109 gcc_assert (cond1
== cond2
);
16113 gcc_assert (cond1
== cond2
);
16117 gcc_assert (cond1
== cond2
);
16118 return CC_DLEUmode
;
16121 gcc_assert (cond1
== cond2
);
16122 return CC_DGEUmode
;
16125 gcc_unreachable ();
16130 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
16132 /* All floating point compares return CCFP if it is an equality
16133 comparison, and CCFPE otherwise. */
16134 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
16157 gcc_unreachable ();
16161 /* A compare with a shifted operand. Because of canonicalization, the
16162 comparison will have to be swapped when we emit the assembler. */
16163 if (GET_MODE (y
) == SImode
16164 && (REG_P (y
) || (SUBREG_P (y
)))
16165 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
16166 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
16167 || GET_CODE (x
) == ROTATERT
))
16170 /* A widened compare of the sum of a value plus a carry against a
16171 constant. This is a representation of RSC. We want to swap the
16172 result of the comparison at output. Not valid if the Z bit is
16174 if (GET_MODE (x
) == DImode
16175 && GET_CODE (x
) == PLUS
16176 && arm_borrow_operation (XEXP (x
, 1), DImode
)
16178 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
16179 && (op
== LE
|| op
== GT
))
16180 || (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
16181 && (op
== LEU
|| op
== GTU
))))
16184 /* If X is a constant we want to use CC_RSBmode. This is
16185 non-canonical, but arm_gen_compare_reg uses this to generate the
16186 correct canonical form. */
16187 if (GET_MODE (y
) == SImode
16188 && (REG_P (y
) || SUBREG_P (y
))
16189 && CONST_INT_P (x
))
16192 /* This operation is performed swapped, but since we only rely on the Z
16193 flag we don't need an additional mode. */
16194 if (GET_MODE (y
) == SImode
16195 && (REG_P (y
) || (SUBREG_P (y
)))
16196 && GET_CODE (x
) == NEG
16197 && (op
== EQ
|| op
== NE
))
16200 /* This is a special case that is used by combine to allow a
16201 comparison of a shifted byte load to be split into a zero-extend
16202 followed by a comparison of the shifted integer (only valid for
16203 equalities and unsigned inequalities). */
16204 if (GET_MODE (x
) == SImode
16205 && GET_CODE (x
) == ASHIFT
16206 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
16207 && GET_CODE (XEXP (x
, 0)) == SUBREG
16208 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
16209 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
16210 && (op
== EQ
|| op
== NE
16211 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
16212 && CONST_INT_P (y
))
16215 /* A construct for a conditional compare, if the false arm contains
16216 0, then both conditions must be true, otherwise either condition
16217 must be true. Not all conditions are possible, so CCmode is
16218 returned if it can't be done. */
16219 if (GET_CODE (x
) == IF_THEN_ELSE
16220 && (XEXP (x
, 2) == const0_rtx
16221 || XEXP (x
, 2) == const1_rtx
)
16222 && COMPARISON_P (XEXP (x
, 0))
16223 && COMPARISON_P (XEXP (x
, 1)))
16224 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16225 INTVAL (XEXP (x
, 2)));
16227 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
16228 if (GET_CODE (x
) == AND
16229 && (op
== EQ
|| op
== NE
)
16230 && COMPARISON_P (XEXP (x
, 0))
16231 && COMPARISON_P (XEXP (x
, 1)))
16232 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16235 if (GET_CODE (x
) == IOR
16236 && (op
== EQ
|| op
== NE
)
16237 && COMPARISON_P (XEXP (x
, 0))
16238 && COMPARISON_P (XEXP (x
, 1)))
16239 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16242 /* An operation (on Thumb) where we want to test for a single bit.
16243 This is done by shifting that bit up into the top bit of a
16244 scratch register; we can then branch on the sign bit. */
16246 && GET_MODE (x
) == SImode
16247 && (op
== EQ
|| op
== NE
)
16248 && GET_CODE (x
) == ZERO_EXTRACT
16249 && XEXP (x
, 1) == const1_rtx
)
16252 /* An operation that sets the condition codes as a side-effect, the
16253 V flag is not set correctly, so we can only use comparisons where
16254 this doesn't matter. (For LT and GE we can use "mi" and "pl"
16256 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
16257 if (GET_MODE (x
) == SImode
16259 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
16260 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
16261 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
16262 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
16263 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
16264 || GET_CODE (x
) == LSHIFTRT
16265 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
16266 || GET_CODE (x
) == ROTATERT
16267 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
16270 /* A comparison of ~reg with a const is really a special
16271 canoncialization of compare (~const, reg), which is a reverse
16272 subtract operation. We may not get here if CONST is 0, but that
16273 doesn't matter because ~0 isn't a valid immediate for RSB. */
16274 if (GET_MODE (x
) == SImode
16275 && GET_CODE (x
) == NOT
16276 && CONST_INT_P (y
))
16279 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
16282 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
16283 && GET_CODE (x
) == PLUS
16284 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
16287 if (GET_MODE (x
) == DImode
16288 && GET_CODE (x
) == PLUS
16289 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
16291 && UINTVAL (y
) == 0x800000000
16292 && (op
== GEU
|| op
== LTU
))
16295 if (GET_MODE (x
) == DImode
16296 && (op
== GE
|| op
== LT
)
16297 && GET_CODE (x
) == SIGN_EXTEND
16298 && ((GET_CODE (y
) == PLUS
16299 && arm_borrow_operation (XEXP (y
, 0), DImode
))
16300 || arm_borrow_operation (y
, DImode
)))
16303 if (GET_MODE (x
) == DImode
16304 && (op
== GEU
|| op
== LTU
)
16305 && GET_CODE (x
) == ZERO_EXTEND
16306 && ((GET_CODE (y
) == PLUS
16307 && arm_borrow_operation (XEXP (y
, 0), DImode
))
16308 || arm_borrow_operation (y
, DImode
)))
16311 if (GET_MODE (x
) == DImode
16312 && (op
== EQ
|| op
== NE
)
16313 && (GET_CODE (x
) == PLUS
16314 || GET_CODE (x
) == MINUS
)
16315 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
16316 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
16317 && GET_CODE (y
) == SIGN_EXTEND
16318 && GET_CODE (XEXP (y
, 0)) == GET_CODE (x
))
16321 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
16322 return GET_MODE (x
);
16327 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
16328 the sequence of instructions needed to generate a suitable condition
16329 code register. Return the CC register result. */
16331 arm_gen_dicompare_reg (rtx_code code
, rtx x
, rtx y
, rtx scratch
)
16336 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
16337 gcc_assert (TARGET_32BIT
);
16338 gcc_assert (!CONST_INT_P (x
));
16340 rtx x_lo
= simplify_gen_subreg (SImode
, x
, DImode
,
16341 subreg_lowpart_offset (SImode
, DImode
));
16342 rtx x_hi
= simplify_gen_subreg (SImode
, x
, DImode
,
16343 subreg_highpart_offset (SImode
, DImode
));
16344 rtx y_lo
= simplify_gen_subreg (SImode
, y
, DImode
,
16345 subreg_lowpart_offset (SImode
, DImode
));
16346 rtx y_hi
= simplify_gen_subreg (SImode
, y
, DImode
,
16347 subreg_highpart_offset (SImode
, DImode
));
16353 if (y_lo
== const0_rtx
|| y_hi
== const0_rtx
)
16355 if (y_lo
!= const0_rtx
)
16357 rtx scratch2
= scratch
? scratch
: gen_reg_rtx (SImode
);
16359 gcc_assert (y_hi
== const0_rtx
);
16360 y_lo
= gen_int_mode (-INTVAL (y_lo
), SImode
);
16361 if (!arm_add_operand (y_lo
, SImode
))
16362 y_lo
= force_reg (SImode
, y_lo
);
16363 emit_insn (gen_addsi3 (scratch2
, x_lo
, y_lo
));
16366 else if (y_hi
!= const0_rtx
)
16368 rtx scratch2
= scratch
? scratch
: gen_reg_rtx (SImode
);
16370 y_hi
= gen_int_mode (-INTVAL (y_hi
), SImode
);
16371 if (!arm_add_operand (y_hi
, SImode
))
16372 y_hi
= force_reg (SImode
, y_hi
);
16373 emit_insn (gen_addsi3 (scratch2
, x_hi
, y_hi
));
16379 gcc_assert (!reload_completed
);
16380 scratch
= gen_rtx_SCRATCH (SImode
);
16383 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
16384 cc_reg
= gen_rtx_REG (CC_NZmode
, CC_REGNUM
);
16387 = gen_rtx_SET (cc_reg
,
16388 gen_rtx_COMPARE (CC_NZmode
,
16389 gen_rtx_IOR (SImode
, x_lo
, x_hi
),
16391 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
,
16396 if (!arm_add_operand (y_lo
, SImode
))
16397 y_lo
= force_reg (SImode
, y_lo
);
16399 if (!arm_add_operand (y_hi
, SImode
))
16400 y_hi
= force_reg (SImode
, y_hi
);
16402 rtx cmp1
= gen_rtx_NE (SImode
, x_lo
, y_lo
);
16403 rtx cmp2
= gen_rtx_NE (SImode
, x_hi
, y_hi
);
16404 rtx conjunction
= gen_rtx_IOR (SImode
, cmp1
, cmp2
);
16405 mode
= SELECT_CC_MODE (code
, conjunction
, const0_rtx
);
16406 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
16408 emit_insn (gen_rtx_SET (cc_reg
,
16409 gen_rtx_COMPARE (mode
, conjunction
,
16417 if (y_lo
== const0_rtx
)
16419 /* If the low word of y is 0, then this is simply a normal
16420 compare of the upper words. */
16421 if (!arm_add_operand (y_hi
, SImode
))
16422 y_hi
= force_reg (SImode
, y_hi
);
16424 return arm_gen_compare_reg (code
, x_hi
, y_hi
, NULL_RTX
);
16427 if (!arm_add_operand (y_lo
, SImode
))
16428 y_lo
= force_reg (SImode
, y_lo
);
16431 = gen_rtx_LTU (DImode
,
16432 arm_gen_compare_reg (LTU
, x_lo
, y_lo
, NULL_RTX
),
16436 scratch
= gen_rtx_SCRATCH (SImode
);
16438 if (!arm_not_operand (y_hi
, SImode
))
16439 y_hi
= force_reg (SImode
, y_hi
);
16442 if (y_hi
== const0_rtx
)
16443 insn
= emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch
, x_hi
,
16445 else if (CONST_INT_P (y_hi
))
16446 insn
= emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch
, x_hi
,
16449 insn
= emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch
, x_hi
, y_hi
,
16451 return SET_DEST (single_set (insn
));
16457 /* During expansion, we only expect to get here if y is a
16458 constant that we want to handle, otherwise we should have
16459 swapped the operands already. */
16460 gcc_assert (arm_const_double_prefer_rsbs_rsc (y
));
16462 if (!const_ok_for_arm (INTVAL (y_lo
)))
16463 y_lo
= force_reg (SImode
, y_lo
);
16465 /* Perform a reverse subtract and compare. */
16467 = gen_rtx_LTU (DImode
,
16468 arm_gen_compare_reg (LTU
, y_lo
, x_lo
, scratch
),
16470 rtx_insn
*insn
= emit_insn (gen_rscsi3_CC_NVout_scratch (scratch
, y_hi
,
16472 return SET_DEST (single_set (insn
));
16478 if (y_lo
== const0_rtx
)
16480 /* If the low word of y is 0, then this is simply a normal
16481 compare of the upper words. */
16482 if (!arm_add_operand (y_hi
, SImode
))
16483 y_hi
= force_reg (SImode
, y_hi
);
16485 return arm_gen_compare_reg (code
, x_hi
, y_hi
, NULL_RTX
);
16488 if (!arm_add_operand (y_lo
, SImode
))
16489 y_lo
= force_reg (SImode
, y_lo
);
16492 = gen_rtx_LTU (DImode
,
16493 arm_gen_compare_reg (LTU
, x_lo
, y_lo
, NULL_RTX
),
16497 scratch
= gen_rtx_SCRATCH (SImode
);
16498 if (!arm_not_operand (y_hi
, SImode
))
16499 y_hi
= force_reg (SImode
, y_hi
);
16502 if (y_hi
== const0_rtx
)
16503 insn
= emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch
, x_hi
,
16505 else if (CONST_INT_P (y_hi
))
16507 /* Constant is viewed as unsigned when zero-extended. */
16508 y_hi
= GEN_INT (UINTVAL (y_hi
) & 0xffffffffULL
);
16509 insn
= emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch
, x_hi
,
16513 insn
= emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch
, x_hi
, y_hi
,
16515 return SET_DEST (single_set (insn
));
16521 /* During expansion, we only expect to get here if y is a
16522 constant that we want to handle, otherwise we should have
16523 swapped the operands already. */
16524 gcc_assert (arm_const_double_prefer_rsbs_rsc (y
));
16526 if (!const_ok_for_arm (INTVAL (y_lo
)))
16527 y_lo
= force_reg (SImode
, y_lo
);
16529 /* Perform a reverse subtract and compare. */
16531 = gen_rtx_LTU (DImode
,
16532 arm_gen_compare_reg (LTU
, y_lo
, x_lo
, scratch
),
16534 y_hi
= GEN_INT (0xffffffff & UINTVAL (y_hi
));
16535 rtx_insn
*insn
= emit_insn (gen_rscsi3_CC_Bout_scratch (scratch
, y_hi
,
16537 return SET_DEST (single_set (insn
));
16541 gcc_unreachable ();
16545 /* X and Y are two things to compare using CODE. Emit the compare insn and
16546 return the rtx for register 0 in the proper mode. */
16548 arm_gen_compare_reg (rtx_code code
, rtx x
, rtx y
, rtx scratch
)
16550 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
16551 return arm_gen_dicompare_reg (code
, x
, y
, scratch
);
16553 machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
16554 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
16555 if (mode
== CC_RSBmode
)
16558 scratch
= gen_rtx_SCRATCH (SImode
);
16559 emit_insn (gen_rsb_imm_compare_scratch (scratch
,
16560 GEN_INT (~UINTVAL (x
)), y
));
16563 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
16568 /* Generate a sequence of insns that will generate the correct return
16569 address mask depending on the physical architecture that the program
16572 arm_gen_return_addr_mask (void)
16574 rtx reg
= gen_reg_rtx (Pmode
);
16576 emit_insn (gen_return_addr_mask (reg
));
16581 arm_reload_in_hi (rtx
*operands
)
16583 rtx ref
= operands
[1];
16585 HOST_WIDE_INT offset
= 0;
16587 if (SUBREG_P (ref
))
16589 offset
= SUBREG_BYTE (ref
);
16590 ref
= SUBREG_REG (ref
);
16595 /* We have a pseudo which has been spilt onto the stack; there
16596 are two cases here: the first where there is a simple
16597 stack-slot replacement and a second where the stack-slot is
16598 out of range, or is used as a subreg. */
16599 if (reg_equiv_mem (REGNO (ref
)))
16601 ref
= reg_equiv_mem (REGNO (ref
));
16602 base
= find_replacement (&XEXP (ref
, 0));
16605 /* The slot is out of range, or was dressed up in a SUBREG. */
16606 base
= reg_equiv_address (REGNO (ref
));
16608 /* PR 62554: If there is no equivalent memory location then just move
16609 the value as an SImode register move. This happens when the target
16610 architecture variant does not have an HImode register move. */
16613 gcc_assert (REG_P (operands
[0]));
16614 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16615 gen_rtx_SUBREG (SImode
, ref
, 0)));
16620 base
= find_replacement (&XEXP (ref
, 0));
16622 /* Handle the case where the address is too complex to be offset by 1. */
16623 if (GET_CODE (base
) == MINUS
16624 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
16626 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16628 emit_set_insn (base_plus
, base
);
16631 else if (GET_CODE (base
) == PLUS
)
16633 /* The addend must be CONST_INT, or we would have dealt with it above. */
16634 HOST_WIDE_INT hi
, lo
;
16636 offset
+= INTVAL (XEXP (base
, 1));
16637 base
= XEXP (base
, 0);
16639 /* Rework the address into a legal sequence of insns. */
16640 /* Valid range for lo is -4095 -> 4095 */
16643 : -((-offset
) & 0xfff));
16645 /* Corner case, if lo is the max offset then we would be out of range
16646 once we have added the additional 1 below, so bump the msb into the
16647 pre-loading insn(s). */
16651 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
16652 ^ (HOST_WIDE_INT
) 0x80000000)
16653 - (HOST_WIDE_INT
) 0x80000000);
16655 gcc_assert (hi
+ lo
== offset
);
16659 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16661 /* Get the base address; addsi3 knows how to handle constants
16662 that require more than one insn. */
16663 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
16669 /* Operands[2] may overlap operands[0] (though it won't overlap
16670 operands[1]), that's why we asked for a DImode reg -- so we can
16671 use the bit that does not overlap. */
16672 if (REGNO (operands
[2]) == REGNO (operands
[0]))
16673 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16675 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
16677 emit_insn (gen_zero_extendqisi2 (scratch
,
16678 gen_rtx_MEM (QImode
,
16679 plus_constant (Pmode
, base
,
16681 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16682 gen_rtx_MEM (QImode
,
16683 plus_constant (Pmode
, base
,
16685 if (!BYTES_BIG_ENDIAN
)
16686 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16687 gen_rtx_IOR (SImode
,
16690 gen_rtx_SUBREG (SImode
, operands
[0], 0),
16694 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16695 gen_rtx_IOR (SImode
,
16696 gen_rtx_ASHIFT (SImode
, scratch
,
16698 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
16701 /* Handle storing a half-word to memory during reload by synthesizing as two
16702 byte stores. Take care not to clobber the input values until after we
16703 have moved them somewhere safe. This code assumes that if the DImode
16704 scratch in operands[2] overlaps either the input value or output address
16705 in some way, then that value must die in this insn (we absolutely need
16706 two scratch registers for some corner cases). */
16708 arm_reload_out_hi (rtx
*operands
)
16710 rtx ref
= operands
[0];
16711 rtx outval
= operands
[1];
16713 HOST_WIDE_INT offset
= 0;
16715 if (SUBREG_P (ref
))
16717 offset
= SUBREG_BYTE (ref
);
16718 ref
= SUBREG_REG (ref
);
16723 /* We have a pseudo which has been spilt onto the stack; there
16724 are two cases here: the first where there is a simple
16725 stack-slot replacement and a second where the stack-slot is
16726 out of range, or is used as a subreg. */
16727 if (reg_equiv_mem (REGNO (ref
)))
16729 ref
= reg_equiv_mem (REGNO (ref
));
16730 base
= find_replacement (&XEXP (ref
, 0));
16733 /* The slot is out of range, or was dressed up in a SUBREG. */
16734 base
= reg_equiv_address (REGNO (ref
));
16736 /* PR 62254: If there is no equivalent memory location then just move
16737 the value as an SImode register move. This happens when the target
16738 architecture variant does not have an HImode register move. */
16741 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
16743 if (REG_P (outval
))
16745 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
16746 gen_rtx_SUBREG (SImode
, outval
, 0)));
16748 else /* SUBREG_P (outval) */
16750 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
16751 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
16752 SUBREG_REG (outval
)));
16754 /* FIXME: Handle other cases ? */
16755 gcc_unreachable ();
16761 base
= find_replacement (&XEXP (ref
, 0));
16763 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
16765 /* Handle the case where the address is too complex to be offset by 1. */
16766 if (GET_CODE (base
) == MINUS
16767 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
16769 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16771 /* Be careful not to destroy OUTVAL. */
16772 if (reg_overlap_mentioned_p (base_plus
, outval
))
16774 /* Updating base_plus might destroy outval, see if we can
16775 swap the scratch and base_plus. */
16776 if (!reg_overlap_mentioned_p (scratch
, outval
))
16777 std::swap (scratch
, base_plus
);
16780 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
16782 /* Be conservative and copy OUTVAL into the scratch now,
16783 this should only be necessary if outval is a subreg
16784 of something larger than a word. */
16785 /* XXX Might this clobber base? I can't see how it can,
16786 since scratch is known to overlap with OUTVAL, and
16787 must be wider than a word. */
16788 emit_insn (gen_movhi (scratch_hi
, outval
));
16789 outval
= scratch_hi
;
16793 emit_set_insn (base_plus
, base
);
16796 else if (GET_CODE (base
) == PLUS
)
16798 /* The addend must be CONST_INT, or we would have dealt with it above. */
16799 HOST_WIDE_INT hi
, lo
;
16801 offset
+= INTVAL (XEXP (base
, 1));
16802 base
= XEXP (base
, 0);
16804 /* Rework the address into a legal sequence of insns. */
16805 /* Valid range for lo is -4095 -> 4095 */
16808 : -((-offset
) & 0xfff));
16810 /* Corner case, if lo is the max offset then we would be out of range
16811 once we have added the additional 1 below, so bump the msb into the
16812 pre-loading insn(s). */
16816 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
16817 ^ (HOST_WIDE_INT
) 0x80000000)
16818 - (HOST_WIDE_INT
) 0x80000000);
16820 gcc_assert (hi
+ lo
== offset
);
16824 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16826 /* Be careful not to destroy OUTVAL. */
16827 if (reg_overlap_mentioned_p (base_plus
, outval
))
16829 /* Updating base_plus might destroy outval, see if we
16830 can swap the scratch and base_plus. */
16831 if (!reg_overlap_mentioned_p (scratch
, outval
))
16832 std::swap (scratch
, base_plus
);
16835 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
16837 /* Be conservative and copy outval into scratch now,
16838 this should only be necessary if outval is a
16839 subreg of something larger than a word. */
16840 /* XXX Might this clobber base? I can't see how it
16841 can, since scratch is known to overlap with
16843 emit_insn (gen_movhi (scratch_hi
, outval
));
16844 outval
= scratch_hi
;
16848 /* Get the base address; addsi3 knows how to handle constants
16849 that require more than one insn. */
16850 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
16856 if (BYTES_BIG_ENDIAN
)
16858 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
16859 plus_constant (Pmode
, base
,
16861 gen_lowpart (QImode
, outval
)));
16862 emit_insn (gen_lshrsi3 (scratch
,
16863 gen_rtx_SUBREG (SImode
, outval
, 0),
16865 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
16867 gen_lowpart (QImode
, scratch
)));
16871 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
16873 gen_lowpart (QImode
, outval
)));
16874 emit_insn (gen_lshrsi3 (scratch
,
16875 gen_rtx_SUBREG (SImode
, outval
, 0),
16877 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
16878 plus_constant (Pmode
, base
,
16880 gen_lowpart (QImode
, scratch
)));
16884 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16885 (padded to the size of a word) should be passed in a register. */
16888 arm_must_pass_in_stack (const function_arg_info
&arg
)
16890 if (TARGET_AAPCS_BASED
)
16891 return must_pass_in_stack_var_size (arg
);
16893 return must_pass_in_stack_var_size_or_pad (arg
);
16897 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16898 byte of a stack argument has useful data. For legacy APCS ABIs we use
16899 the default. For AAPCS based ABIs small aggregate types are placed
16900 in the lowest memory address. */
16902 static pad_direction
16903 arm_function_arg_padding (machine_mode mode
, const_tree type
)
16905 if (!TARGET_AAPCS_BASED
)
16906 return default_function_arg_padding (mode
, type
);
16908 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
16909 return PAD_DOWNWARD
;
16915 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16916 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16917 register has useful data, and return the opposite if the most
16918 significant byte does. */
16921 arm_pad_reg_upward (machine_mode mode
,
16922 tree type
, int first ATTRIBUTE_UNUSED
)
16924 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
16926 /* For AAPCS, small aggregates, small fixed-point types,
16927 and small complex types are always padded upwards. */
16930 if ((AGGREGATE_TYPE_P (type
)
16931 || TREE_CODE (type
) == COMPLEX_TYPE
16932 || FIXED_POINT_TYPE_P (type
))
16933 && int_size_in_bytes (type
) <= 4)
16938 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
16939 && GET_MODE_SIZE (mode
) <= 4)
16944 /* Otherwise, use default padding. */
16945 return !BYTES_BIG_ENDIAN
;
16948 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16949 assuming that the address in the base register is word aligned. */
16951 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
16953 HOST_WIDE_INT max_offset
;
16955 /* Offset must be a multiple of 4 in Thumb mode. */
16956 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
16961 else if (TARGET_ARM
)
16966 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
16969 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16970 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16971 Assumes that the address in the base register RN is word aligned. Pattern
16972 guarantees that both memory accesses use the same base register,
16973 the offsets are constants within the range, and the gap between the offsets is 4.
16974 If preload complete then check that registers are legal. WBACK indicates whether
16975 address is updated. LOAD indicates whether memory access is load or store. */
16977 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
16978 bool wback
, bool load
)
16980 unsigned int t
, t2
, n
;
16982 if (!reload_completed
)
16985 if (!offset_ok_for_ldrd_strd (offset
))
16992 if ((TARGET_THUMB2
)
16993 && ((wback
&& (n
== t
|| n
== t2
))
16994 || (t
== SP_REGNUM
)
16995 || (t
== PC_REGNUM
)
16996 || (t2
== SP_REGNUM
)
16997 || (t2
== PC_REGNUM
)
16998 || (!load
&& (n
== PC_REGNUM
))
16999 || (load
&& (t
== t2
))
17000 /* Triggers Cortex-M3 LDRD errata. */
17001 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
17005 && ((wback
&& (n
== t
|| n
== t2
))
17006 || (t2
== PC_REGNUM
)
17007 || (t
% 2 != 0) /* First destination register is not even. */
17009 /* PC can be used as base register (for offset addressing only),
17010 but it is depricated. */
17011 || (n
== PC_REGNUM
)))
17017 /* Return true if a 64-bit access with alignment ALIGN and with a
17018 constant offset OFFSET from the base pointer is permitted on this
17021 align_ok_ldrd_strd (HOST_WIDE_INT align
, HOST_WIDE_INT offset
)
17023 return (unaligned_access
17024 ? (align
>= BITS_PER_WORD
&& (offset
& 3) == 0)
17025 : (align
>= 2 * BITS_PER_WORD
&& (offset
& 7) == 0));
17028 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
17029 operand MEM's address contains an immediate offset from the base
17030 register and has no side effects, in which case it sets BASE,
17031 OFFSET and ALIGN accordingly. */
17033 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
, HOST_WIDE_INT
*align
)
17037 gcc_assert (base
!= NULL
&& offset
!= NULL
);
17039 /* TODO: Handle more general memory operand patterns, such as
17040 PRE_DEC and PRE_INC. */
17042 if (side_effects_p (mem
))
17045 /* Can't deal with subregs. */
17046 if (SUBREG_P (mem
))
17049 gcc_assert (MEM_P (mem
));
17051 *offset
= const0_rtx
;
17052 *align
= MEM_ALIGN (mem
);
17054 addr
= XEXP (mem
, 0);
17056 /* If addr isn't valid for DImode, then we can't handle it. */
17057 if (!arm_legitimate_address_p (DImode
, addr
,
17058 reload_in_progress
|| reload_completed
))
17066 else if (GET_CODE (addr
) == PLUS
)
17068 *base
= XEXP (addr
, 0);
17069 *offset
= XEXP (addr
, 1);
17070 return (REG_P (*base
) && CONST_INT_P (*offset
));
17076 /* Called from a peephole2 to replace two word-size accesses with a
17077 single LDRD/STRD instruction. Returns true iff we can generate a
17078 new instruction sequence. That is, both accesses use the same base
17079 register and the gap between constant offsets is 4. This function
17080 may reorder its operands to match ldrd/strd RTL templates.
17081 OPERANDS are the operands found by the peephole matcher;
17082 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
17083 corresponding memory operands. LOAD indicaates whether the access
17084 is load or store. CONST_STORE indicates a store of constant
17085 integer values held in OPERANDS[4,5] and assumes that the pattern
17086 is of length 4 insn, for the purpose of checking dead registers.
17087 COMMUTE indicates that register operands may be reordered. */
17089 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
17090 bool const_store
, bool commute
)
17093 HOST_WIDE_INT offsets
[2], offset
, align
[2];
17094 rtx base
= NULL_RTX
;
17095 rtx cur_base
, cur_offset
, tmp
;
17097 HARD_REG_SET regset
;
17099 gcc_assert (!const_store
|| !load
);
17100 /* Check that the memory references are immediate offsets from the
17101 same base register. Extract the base register, the destination
17102 registers, and the corresponding memory offsets. */
17103 for (i
= 0; i
< nops
; i
++)
17105 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
17111 else if (REGNO (base
) != REGNO (cur_base
))
17114 offsets
[i
] = INTVAL (cur_offset
);
17115 if (GET_CODE (operands
[i
]) == SUBREG
)
17117 tmp
= SUBREG_REG (operands
[i
]);
17118 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
17123 /* Make sure there is no dependency between the individual loads. */
17124 if (load
&& REGNO (operands
[0]) == REGNO (base
))
17125 return false; /* RAW */
17127 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
17128 return false; /* WAW */
17130 /* If the same input register is used in both stores
17131 when storing different constants, try to find a free register.
17132 For example, the code
17137 can be transformed into
17141 in Thumb mode assuming that r1 is free.
17142 For ARM mode do the same but only if the starting register
17143 can be made to be even. */
17145 && REGNO (operands
[0]) == REGNO (operands
[1])
17146 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
17150 CLEAR_HARD_REG_SET (regset
);
17151 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17152 if (tmp
== NULL_RTX
)
17155 /* Use the new register in the first load to ensure that
17156 if the original input register is not dead after peephole,
17157 then it will have the correct constant value. */
17160 else if (TARGET_ARM
)
17162 int regno
= REGNO (operands
[0]);
17163 if (!peep2_reg_dead_p (4, operands
[0]))
17165 /* When the input register is even and is not dead after the
17166 pattern, it has to hold the second constant but we cannot
17167 form a legal STRD in ARM mode with this register as the second
17169 if (regno
% 2 == 0)
17172 /* Is regno-1 free? */
17173 SET_HARD_REG_SET (regset
);
17174 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
17175 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17176 if (tmp
== NULL_RTX
)
17183 /* Find a DImode register. */
17184 CLEAR_HARD_REG_SET (regset
);
17185 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
17186 if (tmp
!= NULL_RTX
)
17188 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
17189 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
17193 /* Can we use the input register to form a DI register? */
17194 SET_HARD_REG_SET (regset
);
17195 CLEAR_HARD_REG_BIT(regset
,
17196 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
17197 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17198 if (tmp
== NULL_RTX
)
17200 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
17204 gcc_assert (operands
[0] != NULL_RTX
);
17205 gcc_assert (operands
[1] != NULL_RTX
);
17206 gcc_assert (REGNO (operands
[0]) % 2 == 0);
17207 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
17211 /* Make sure the instructions are ordered with lower memory access first. */
17212 if (offsets
[0] > offsets
[1])
17214 gap
= offsets
[0] - offsets
[1];
17215 offset
= offsets
[1];
17217 /* Swap the instructions such that lower memory is accessed first. */
17218 std::swap (operands
[0], operands
[1]);
17219 std::swap (operands
[2], operands
[3]);
17220 std::swap (align
[0], align
[1]);
17222 std::swap (operands
[4], operands
[5]);
17226 gap
= offsets
[1] - offsets
[0];
17227 offset
= offsets
[0];
17230 /* Make sure accesses are to consecutive memory locations. */
17231 if (gap
!= GET_MODE_SIZE (SImode
))
17234 if (!align_ok_ldrd_strd (align
[0], offset
))
17237 /* Make sure we generate legal instructions. */
17238 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17242 /* In Thumb state, where registers are almost unconstrained, there
17243 is little hope to fix it. */
17247 if (load
&& commute
)
17249 /* Try reordering registers. */
17250 std::swap (operands
[0], operands
[1]);
17251 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17258 /* If input registers are dead after this pattern, they can be
17259 reordered or replaced by other registers that are free in the
17260 current pattern. */
17261 if (!peep2_reg_dead_p (4, operands
[0])
17262 || !peep2_reg_dead_p (4, operands
[1]))
17265 /* Try to reorder the input registers. */
17266 /* For example, the code
17271 can be transformed into
17276 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
17279 std::swap (operands
[0], operands
[1]);
17283 /* Try to find a free DI register. */
17284 CLEAR_HARD_REG_SET (regset
);
17285 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
17286 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
17289 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
17290 if (tmp
== NULL_RTX
)
17293 /* DREG must be an even-numbered register in DImode.
17294 Split it into SI registers. */
17295 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
17296 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
17297 gcc_assert (operands
[0] != NULL_RTX
);
17298 gcc_assert (operands
[1] != NULL_RTX
);
17299 gcc_assert (REGNO (operands
[0]) % 2 == 0);
17300 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
17302 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
17312 /* Return true if parallel execution of the two word-size accesses provided
17313 could be satisfied with a single LDRD/STRD instruction. Two word-size
17314 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17315 register operands and OPERANDS[2,3] are the corresponding memory operands.
17318 valid_operands_ldrd_strd (rtx
*operands
, bool load
)
17321 HOST_WIDE_INT offsets
[2], offset
, align
[2];
17322 rtx base
= NULL_RTX
;
17323 rtx cur_base
, cur_offset
;
17326 /* Check that the memory references are immediate offsets from the
17327 same base register. Extract the base register, the destination
17328 registers, and the corresponding memory offsets. */
17329 for (i
= 0; i
< nops
; i
++)
17331 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
17337 else if (REGNO (base
) != REGNO (cur_base
))
17340 offsets
[i
] = INTVAL (cur_offset
);
17341 if (GET_CODE (operands
[i
]) == SUBREG
)
17345 if (offsets
[0] > offsets
[1])
17348 gap
= offsets
[1] - offsets
[0];
17349 offset
= offsets
[0];
17351 /* Make sure accesses are to consecutive memory locations. */
17352 if (gap
!= GET_MODE_SIZE (SImode
))
17355 if (!align_ok_ldrd_strd (align
[0], offset
))
17358 return operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17363 /* Print a symbolic form of X to the debug file, F. */
17365 arm_print_value (FILE *f
, rtx x
)
17367 switch (GET_CODE (x
))
17370 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
17376 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
17377 sizeof (fpstr
), 0, 1);
17387 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
17389 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
17390 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
17398 fprintf (f
, "\"%s\"", XSTR (x
, 0));
17402 fprintf (f
, "`%s'", XSTR (x
, 0));
17406 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
17410 arm_print_value (f
, XEXP (x
, 0));
17414 arm_print_value (f
, XEXP (x
, 0));
17416 arm_print_value (f
, XEXP (x
, 1));
17424 fprintf (f
, "????");
17429 /* Routines for manipulation of the constant pool. */
17431 /* Arm instructions cannot load a large constant directly into a
17432 register; they have to come from a pc relative load. The constant
17433 must therefore be placed in the addressable range of the pc
17434 relative load. Depending on the precise pc relative load
17435 instruction the range is somewhere between 256 bytes and 4k. This
17436 means that we often have to dump a constant inside a function, and
17437 generate code to branch around it.
17439 It is important to minimize this, since the branches will slow
17440 things down and make the code larger.
17442 Normally we can hide the table after an existing unconditional
17443 branch so that there is no interruption of the flow, but in the
17444 worst case the code looks like this:
17462 We fix this by performing a scan after scheduling, which notices
17463 which instructions need to have their operands fetched from the
17464 constant table and builds the table.
17466 The algorithm starts by building a table of all the constants that
17467 need fixing up and all the natural barriers in the function (places
17468 where a constant table can be dropped without breaking the flow).
17469 For each fixup we note how far the pc-relative replacement will be
17470 able to reach and the offset of the instruction into the function.
17472 Having built the table we then group the fixes together to form
17473 tables that are as large as possible (subject to addressing
17474 constraints) and emit each table of constants after the last
17475 barrier that is within range of all the instructions in the group.
17476 If a group does not contain a barrier, then we forcibly create one
17477 by inserting a jump instruction into the flow. Once the table has
17478 been inserted, the insns are then modified to reference the
17479 relevant entry in the pool.
17481 Possible enhancements to the algorithm (not implemented) are:
17483 1) For some processors and object formats, there may be benefit in
17484 aligning the pools to the start of cache lines; this alignment
17485 would need to be taken into account when calculating addressability
17488 /* These typedefs are located at the start of this file, so that
17489 they can be used in the prototypes there. This comment is to
17490 remind readers of that fact so that the following structures
17491 can be understood more easily.
17493 typedef struct minipool_node Mnode;
17494 typedef struct minipool_fixup Mfix; */
17496 struct minipool_node
17498 /* Doubly linked chain of entries. */
17501 /* The maximum offset into the code that this entry can be placed. While
17502 pushing fixes for forward references, all entries are sorted in order
17503 of increasing max_address. */
17504 HOST_WIDE_INT max_address
;
17505 /* Similarly for an entry inserted for a backwards ref. */
17506 HOST_WIDE_INT min_address
;
17507 /* The number of fixes referencing this entry. This can become zero
17508 if we "unpush" an entry. In this case we ignore the entry when we
17509 come to emit the code. */
17511 /* The offset from the start of the minipool. */
17512 HOST_WIDE_INT offset
;
17513 /* The value in table. */
17515 /* The mode of value. */
17517 /* The size of the value. With iWMMXt enabled
17518 sizes > 4 also imply an alignment of 8-bytes. */
17522 struct minipool_fixup
17526 HOST_WIDE_INT address
;
17532 HOST_WIDE_INT forwards
;
17533 HOST_WIDE_INT backwards
;
17536 /* Fixes less than a word need padding out to a word boundary. */
17537 #define MINIPOOL_FIX_SIZE(mode) \
17538 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17540 static Mnode
* minipool_vector_head
;
17541 static Mnode
* minipool_vector_tail
;
17542 static rtx_code_label
*minipool_vector_label
;
17543 static int minipool_pad
;
17545 /* The linked list of all minipool fixes required for this function. */
17546 Mfix
* minipool_fix_head
;
17547 Mfix
* minipool_fix_tail
;
17548 /* The fix entry for the current minipool, once it has been placed. */
17549 Mfix
* minipool_barrier
;
17551 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17552 #define JUMP_TABLES_IN_TEXT_SECTION 0
17555 static HOST_WIDE_INT
17556 get_jump_table_size (rtx_jump_table_data
*insn
)
17558 /* ADDR_VECs only take room if read-only data does into the text
17560 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
17562 rtx body
= PATTERN (insn
);
17563 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
17564 HOST_WIDE_INT size
;
17565 HOST_WIDE_INT modesize
;
17567 modesize
= GET_MODE_SIZE (GET_MODE (body
));
17568 size
= modesize
* XVECLEN (body
, elt
);
17572 /* Round up size of TBB table to a halfword boundary. */
17573 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
17576 /* No padding necessary for TBH. */
17579 /* Add two bytes for alignment on Thumb. */
17584 gcc_unreachable ();
17592 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17593 function descriptor) into a register and the GOT address into the
17594 FDPIC register, returning an rtx for the register holding the
17595 function address. */
17598 arm_load_function_descriptor (rtx funcdesc
)
17600 rtx fnaddr_reg
= gen_reg_rtx (Pmode
);
17601 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
17602 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
17603 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
17605 emit_move_insn (fnaddr_reg
, fnaddr
);
17607 /* The ABI requires the entry point address to be loaded first, but
17608 since we cannot support lazy binding for lack of atomic load of
17609 two 32-bits values, we do not need to bother to prevent the
17610 previous load from being moved after that of the GOT address. */
17611 emit_insn (gen_restore_pic_register_after_call (pic_reg
, gotaddr
));
17616 /* Return the maximum amount of padding that will be inserted before
17618 static HOST_WIDE_INT
17619 get_label_padding (rtx label
)
17621 HOST_WIDE_INT align
, min_insn_size
;
17623 align
= 1 << label_to_alignment (label
).levels
[0].log
;
17624 min_insn_size
= TARGET_THUMB
? 2 : 4;
17625 return align
> min_insn_size
? align
- min_insn_size
: 0;
17628 /* Move a minipool fix MP from its current location to before MAX_MP.
17629 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17630 constraints may need updating. */
17632 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
17633 HOST_WIDE_INT max_address
)
17635 /* The code below assumes these are different. */
17636 gcc_assert (mp
!= max_mp
);
17638 if (max_mp
== NULL
)
17640 if (max_address
< mp
->max_address
)
17641 mp
->max_address
= max_address
;
17645 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
17646 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
17648 mp
->max_address
= max_address
;
17650 /* Unlink MP from its current position. Since max_mp is non-null,
17651 mp->prev must be non-null. */
17652 mp
->prev
->next
= mp
->next
;
17653 if (mp
->next
!= NULL
)
17654 mp
->next
->prev
= mp
->prev
;
17656 minipool_vector_tail
= mp
->prev
;
17658 /* Re-insert it before MAX_MP. */
17660 mp
->prev
= max_mp
->prev
;
17663 if (mp
->prev
!= NULL
)
17664 mp
->prev
->next
= mp
;
17666 minipool_vector_head
= mp
;
17669 /* Save the new entry. */
17672 /* Scan over the preceding entries and adjust their addresses as
17674 while (mp
->prev
!= NULL
17675 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
17677 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
17684 /* Add a constant to the minipool for a forward reference. Returns the
17685 node added or NULL if the constant will not fit in this pool. */
17687 add_minipool_forward_ref (Mfix
*fix
)
17689 /* If set, max_mp is the first pool_entry that has a lower
17690 constraint than the one we are trying to add. */
17691 Mnode
* max_mp
= NULL
;
17692 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
17695 /* If the minipool starts before the end of FIX->INSN then this FIX
17696 cannot be placed into the current pool. Furthermore, adding the
17697 new constant pool entry may cause the pool to start FIX_SIZE bytes
17699 if (minipool_vector_head
&&
17700 (fix
->address
+ get_attr_length (fix
->insn
)
17701 >= minipool_vector_head
->max_address
- fix
->fix_size
))
17704 /* Scan the pool to see if a constant with the same value has
17705 already been added. While we are doing this, also note the
17706 location where we must insert the constant if it doesn't already
17708 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17710 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
17711 && fix
->mode
== mp
->mode
17712 && (!LABEL_P (fix
->value
)
17713 || (CODE_LABEL_NUMBER (fix
->value
)
17714 == CODE_LABEL_NUMBER (mp
->value
)))
17715 && rtx_equal_p (fix
->value
, mp
->value
))
17717 /* More than one fix references this entry. */
17719 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
17722 /* Note the insertion point if necessary. */
17724 && mp
->max_address
> max_address
)
17727 /* If we are inserting an 8-bytes aligned quantity and
17728 we have not already found an insertion point, then
17729 make sure that all such 8-byte aligned quantities are
17730 placed at the start of the pool. */
17731 if (ARM_DOUBLEWORD_ALIGN
17733 && fix
->fix_size
>= 8
17734 && mp
->fix_size
< 8)
17737 max_address
= mp
->max_address
;
17741 /* The value is not currently in the minipool, so we need to create
17742 a new entry for it. If MAX_MP is NULL, the entry will be put on
17743 the end of the list since the placement is less constrained than
17744 any existing entry. Otherwise, we insert the new fix before
17745 MAX_MP and, if necessary, adjust the constraints on the other
17748 mp
->fix_size
= fix
->fix_size
;
17749 mp
->mode
= fix
->mode
;
17750 mp
->value
= fix
->value
;
17752 /* Not yet required for a backwards ref. */
17753 mp
->min_address
= -65536;
17755 if (max_mp
== NULL
)
17757 mp
->max_address
= max_address
;
17759 mp
->prev
= minipool_vector_tail
;
17761 if (mp
->prev
== NULL
)
17763 minipool_vector_head
= mp
;
17764 minipool_vector_label
= gen_label_rtx ();
17767 mp
->prev
->next
= mp
;
17769 minipool_vector_tail
= mp
;
17773 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
17774 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
17776 mp
->max_address
= max_address
;
17779 mp
->prev
= max_mp
->prev
;
17781 if (mp
->prev
!= NULL
)
17782 mp
->prev
->next
= mp
;
17784 minipool_vector_head
= mp
;
17787 /* Save the new entry. */
17790 /* Scan over the preceding entries and adjust their addresses as
17792 while (mp
->prev
!= NULL
17793 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
17795 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
17803 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
17804 HOST_WIDE_INT min_address
)
17806 HOST_WIDE_INT offset
;
17808 /* The code below assumes these are different. */
17809 gcc_assert (mp
!= min_mp
);
17811 if (min_mp
== NULL
)
17813 if (min_address
> mp
->min_address
)
17814 mp
->min_address
= min_address
;
17818 /* We will adjust this below if it is too loose. */
17819 mp
->min_address
= min_address
;
17821 /* Unlink MP from its current position. Since min_mp is non-null,
17822 mp->next must be non-null. */
17823 mp
->next
->prev
= mp
->prev
;
17824 if (mp
->prev
!= NULL
)
17825 mp
->prev
->next
= mp
->next
;
17827 minipool_vector_head
= mp
->next
;
17829 /* Reinsert it after MIN_MP. */
17831 mp
->next
= min_mp
->next
;
17833 if (mp
->next
!= NULL
)
17834 mp
->next
->prev
= mp
;
17836 minipool_vector_tail
= mp
;
17842 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17844 mp
->offset
= offset
;
17845 if (mp
->refcount
> 0)
17846 offset
+= mp
->fix_size
;
17848 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
17849 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
17855 /* Add a constant to the minipool for a backward reference. Returns the
17856 node added or NULL if the constant will not fit in this pool.
17858 Note that the code for insertion for a backwards reference can be
17859 somewhat confusing because the calculated offsets for each fix do
17860 not take into account the size of the pool (which is still under
17863 add_minipool_backward_ref (Mfix
*fix
)
17865 /* If set, min_mp is the last pool_entry that has a lower constraint
17866 than the one we are trying to add. */
17867 Mnode
*min_mp
= NULL
;
17868 /* This can be negative, since it is only a constraint. */
17869 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
17872 /* If we can't reach the current pool from this insn, or if we can't
17873 insert this entry at the end of the pool without pushing other
17874 fixes out of range, then we don't try. This ensures that we
17875 can't fail later on. */
17876 if (min_address
>= minipool_barrier
->address
17877 || (minipool_vector_tail
->min_address
+ fix
->fix_size
17878 >= minipool_barrier
->address
))
17881 /* Scan the pool to see if a constant with the same value has
17882 already been added. While we are doing this, also note the
17883 location where we must insert the constant if it doesn't already
17885 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
17887 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
17888 && fix
->mode
== mp
->mode
17889 && (!LABEL_P (fix
->value
)
17890 || (CODE_LABEL_NUMBER (fix
->value
)
17891 == CODE_LABEL_NUMBER (mp
->value
)))
17892 && rtx_equal_p (fix
->value
, mp
->value
)
17893 /* Check that there is enough slack to move this entry to the
17894 end of the table (this is conservative). */
17895 && (mp
->max_address
17896 > (minipool_barrier
->address
17897 + minipool_vector_tail
->offset
17898 + minipool_vector_tail
->fix_size
)))
17901 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
17904 if (min_mp
!= NULL
)
17905 mp
->min_address
+= fix
->fix_size
;
17908 /* Note the insertion point if necessary. */
17909 if (mp
->min_address
< min_address
)
17911 /* For now, we do not allow the insertion of 8-byte alignment
17912 requiring nodes anywhere but at the start of the pool. */
17913 if (ARM_DOUBLEWORD_ALIGN
17914 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
17919 else if (mp
->max_address
17920 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
17922 /* Inserting before this entry would push the fix beyond
17923 its maximum address (which can happen if we have
17924 re-located a forwards fix); force the new fix to come
17926 if (ARM_DOUBLEWORD_ALIGN
17927 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
17932 min_address
= mp
->min_address
+ fix
->fix_size
;
17935 /* Do not insert a non-8-byte aligned quantity before 8-byte
17936 aligned quantities. */
17937 else if (ARM_DOUBLEWORD_ALIGN
17938 && fix
->fix_size
< 8
17939 && mp
->fix_size
>= 8)
17942 min_address
= mp
->min_address
+ fix
->fix_size
;
17947 /* We need to create a new entry. */
17949 mp
->fix_size
= fix
->fix_size
;
17950 mp
->mode
= fix
->mode
;
17951 mp
->value
= fix
->value
;
17953 mp
->max_address
= minipool_barrier
->address
+ 65536;
17955 mp
->min_address
= min_address
;
17957 if (min_mp
== NULL
)
17960 mp
->next
= minipool_vector_head
;
17962 if (mp
->next
== NULL
)
17964 minipool_vector_tail
= mp
;
17965 minipool_vector_label
= gen_label_rtx ();
17968 mp
->next
->prev
= mp
;
17970 minipool_vector_head
= mp
;
17974 mp
->next
= min_mp
->next
;
17978 if (mp
->next
!= NULL
)
17979 mp
->next
->prev
= mp
;
17981 minipool_vector_tail
= mp
;
17984 /* Save the new entry. */
17992 /* Scan over the following entries and adjust their offsets. */
17993 while (mp
->next
!= NULL
)
17995 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
17996 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
17999 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
18001 mp
->next
->offset
= mp
->offset
;
18010 assign_minipool_offsets (Mfix
*barrier
)
18012 HOST_WIDE_INT offset
= 0;
18015 minipool_barrier
= barrier
;
18017 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
18019 mp
->offset
= offset
;
18021 if (mp
->refcount
> 0)
18022 offset
+= mp
->fix_size
;
18026 /* Output the literal table */
18028 dump_minipool (rtx_insn
*scan
)
18034 if (ARM_DOUBLEWORD_ALIGN
)
18035 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
18036 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
18043 fprintf (dump_file
,
18044 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
18045 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
18047 scan
= emit_label_after (gen_label_rtx (), scan
);
18048 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
18049 scan
= emit_label_after (minipool_vector_label
, scan
);
18051 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
18053 if (mp
->refcount
> 0)
18057 fprintf (dump_file
,
18058 ";; Offset %u, min %ld, max %ld ",
18059 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
18060 (unsigned long) mp
->max_address
);
18061 arm_print_value (dump_file
, mp
->value
);
18062 fputc ('\n', dump_file
);
18065 rtx val
= copy_rtx (mp
->value
);
18067 switch (GET_MODE_SIZE (mp
->mode
))
18069 #ifdef HAVE_consttable_1
18071 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
18075 #ifdef HAVE_consttable_2
18077 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
18081 #ifdef HAVE_consttable_4
18083 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
18087 #ifdef HAVE_consttable_8
18089 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
18093 #ifdef HAVE_consttable_16
18095 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
18100 gcc_unreachable ();
18108 minipool_vector_head
= minipool_vector_tail
= NULL
;
18109 scan
= emit_insn_after (gen_consttable_end (), scan
);
18110 scan
= emit_barrier_after (scan
);
18113 /* Return the cost of forcibly inserting a barrier after INSN. */
18115 arm_barrier_cost (rtx_insn
*insn
)
18117 /* Basing the location of the pool on the loop depth is preferable,
18118 but at the moment, the basic block information seems to be
18119 corrupt by this stage of the compilation. */
18120 int base_cost
= 50;
18121 rtx_insn
*next
= next_nonnote_insn (insn
);
18123 if (next
!= NULL
&& LABEL_P (next
))
18126 switch (GET_CODE (insn
))
18129 /* It will always be better to place the table before the label, rather
18138 return base_cost
- 10;
18141 return base_cost
+ 10;
18145 /* Find the best place in the insn stream in the range
18146 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18147 Create the barrier by inserting a jump and add a new fix entry for
18150 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
18152 HOST_WIDE_INT count
= 0;
18153 rtx_barrier
*barrier
;
18154 rtx_insn
*from
= fix
->insn
;
18155 /* The instruction after which we will insert the jump. */
18156 rtx_insn
*selected
= NULL
;
18158 /* The address at which the jump instruction will be placed. */
18159 HOST_WIDE_INT selected_address
;
18161 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
18162 rtx_code_label
*label
= gen_label_rtx ();
18164 selected_cost
= arm_barrier_cost (from
);
18165 selected_address
= fix
->address
;
18167 while (from
&& count
< max_count
)
18169 rtx_jump_table_data
*tmp
;
18172 /* This code shouldn't have been called if there was a natural barrier
18174 gcc_assert (!BARRIER_P (from
));
18176 /* Count the length of this insn. This must stay in sync with the
18177 code that pushes minipool fixes. */
18178 if (LABEL_P (from
))
18179 count
+= get_label_padding (from
);
18181 count
+= get_attr_length (from
);
18183 /* If there is a jump table, add its length. */
18184 if (tablejump_p (from
, NULL
, &tmp
))
18186 count
+= get_jump_table_size (tmp
);
18188 /* Jump tables aren't in a basic block, so base the cost on
18189 the dispatch insn. If we select this location, we will
18190 still put the pool after the table. */
18191 new_cost
= arm_barrier_cost (from
);
18193 if (count
< max_count
18194 && (!selected
|| new_cost
<= selected_cost
))
18197 selected_cost
= new_cost
;
18198 selected_address
= fix
->address
+ count
;
18201 /* Continue after the dispatch table. */
18202 from
= NEXT_INSN (tmp
);
18206 new_cost
= arm_barrier_cost (from
);
18208 if (count
< max_count
18209 && (!selected
|| new_cost
<= selected_cost
))
18212 selected_cost
= new_cost
;
18213 selected_address
= fix
->address
+ count
;
18216 from
= NEXT_INSN (from
);
18219 /* Make sure that we found a place to insert the jump. */
18220 gcc_assert (selected
);
18222 /* Create a new JUMP_INSN that branches around a barrier. */
18223 from
= emit_jump_insn_after (gen_jump (label
), selected
);
18224 JUMP_LABEL (from
) = label
;
18225 barrier
= emit_barrier_after (from
);
18226 emit_label_after (label
, barrier
);
18228 /* Create a minipool barrier entry for the new barrier. */
18229 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
18230 new_fix
->insn
= barrier
;
18231 new_fix
->address
= selected_address
;
18232 new_fix
->next
= fix
->next
;
18233 fix
->next
= new_fix
;
18238 /* Record that there is a natural barrier in the insn stream at
18241 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
18243 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
18246 fix
->address
= address
;
18249 if (minipool_fix_head
!= NULL
)
18250 minipool_fix_tail
->next
= fix
;
18252 minipool_fix_head
= fix
;
18254 minipool_fix_tail
= fix
;
18257 /* Record INSN, which will need fixing up to load a value from the
18258 minipool. ADDRESS is the offset of the insn since the start of the
18259 function; LOC is a pointer to the part of the insn which requires
18260 fixing; VALUE is the constant that must be loaded, which is of type
18263 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
18264 machine_mode mode
, rtx value
)
18266 gcc_assert (!arm_disable_literal_pool
);
18267 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
18270 fix
->address
= address
;
18273 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
18274 fix
->value
= value
;
18275 fix
->forwards
= get_attr_pool_range (insn
);
18276 fix
->backwards
= get_attr_neg_pool_range (insn
);
18277 fix
->minipool
= NULL
;
18279 /* If an insn doesn't have a range defined for it, then it isn't
18280 expecting to be reworked by this code. Better to stop now than
18281 to generate duff assembly code. */
18282 gcc_assert (fix
->forwards
|| fix
->backwards
);
18284 /* If an entry requires 8-byte alignment then assume all constant pools
18285 require 4 bytes of padding. Trying to do this later on a per-pool
18286 basis is awkward because existing pool entries have to be modified. */
18287 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
18292 fprintf (dump_file
,
18293 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18294 GET_MODE_NAME (mode
),
18295 INSN_UID (insn
), (unsigned long) address
,
18296 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
18297 arm_print_value (dump_file
, fix
->value
);
18298 fprintf (dump_file
, "\n");
18301 /* Add it to the chain of fixes. */
18304 if (minipool_fix_head
!= NULL
)
18305 minipool_fix_tail
->next
= fix
;
18307 minipool_fix_head
= fix
;
18309 minipool_fix_tail
= fix
;
18312 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18313 Returns the number of insns needed, or 99 if we always want to synthesize
18316 arm_max_const_double_inline_cost ()
18318 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
18321 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18322 Returns the number of insns needed, or 99 if we don't know how to
18325 arm_const_double_inline_cost (rtx val
)
18327 rtx lowpart
, highpart
;
18330 mode
= GET_MODE (val
);
18332 if (mode
== VOIDmode
)
18335 gcc_assert (GET_MODE_SIZE (mode
) == 8);
18337 lowpart
= gen_lowpart (SImode
, val
);
18338 highpart
= gen_highpart_mode (SImode
, mode
, val
);
18340 gcc_assert (CONST_INT_P (lowpart
));
18341 gcc_assert (CONST_INT_P (highpart
));
18343 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
18344 NULL_RTX
, NULL_RTX
, 0, 0)
18345 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
18346 NULL_RTX
, NULL_RTX
, 0, 0));
18349 /* Cost of loading a SImode constant. */
18351 arm_const_inline_cost (enum rtx_code code
, rtx val
)
18353 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
18354 NULL_RTX
, NULL_RTX
, 1, 0);
18357 /* Return true if it is worthwhile to split a 64-bit constant into two
18358 32-bit operations. This is the case if optimizing for size, or
18359 if we have load delay slots, or if one 32-bit part can be done with
18360 a single data operation. */
18362 arm_const_double_by_parts (rtx val
)
18364 machine_mode mode
= GET_MODE (val
);
18367 if (optimize_size
|| arm_ld_sched
)
18370 if (mode
== VOIDmode
)
18373 part
= gen_highpart_mode (SImode
, mode
, val
);
18375 gcc_assert (CONST_INT_P (part
));
18377 if (const_ok_for_arm (INTVAL (part
))
18378 || const_ok_for_arm (~INTVAL (part
)))
18381 part
= gen_lowpart (SImode
, val
);
18383 gcc_assert (CONST_INT_P (part
));
18385 if (const_ok_for_arm (INTVAL (part
))
18386 || const_ok_for_arm (~INTVAL (part
)))
18392 /* Return true if it is possible to inline both the high and low parts
18393 of a 64-bit constant into 32-bit data processing instructions. */
18395 arm_const_double_by_immediates (rtx val
)
18397 machine_mode mode
= GET_MODE (val
);
18400 if (mode
== VOIDmode
)
18403 part
= gen_highpart_mode (SImode
, mode
, val
);
18405 gcc_assert (CONST_INT_P (part
));
18407 if (!const_ok_for_arm (INTVAL (part
)))
18410 part
= gen_lowpart (SImode
, val
);
18412 gcc_assert (CONST_INT_P (part
));
18414 if (!const_ok_for_arm (INTVAL (part
)))
18420 /* Scan INSN and note any of its operands that need fixing.
18421 If DO_PUSHES is false we do not actually push any of the fixups
18424 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
18428 extract_constrain_insn (insn
);
18430 if (recog_data
.n_alternatives
== 0)
18433 /* Fill in recog_op_alt with information about the constraints of
18435 preprocess_constraints (insn
);
18437 const operand_alternative
*op_alt
= which_op_alt ();
18438 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
18440 /* Things we need to fix can only occur in inputs. */
18441 if (recog_data
.operand_type
[opno
] != OP_IN
)
18444 /* If this alternative is a memory reference, then any mention
18445 of constants in this alternative is really to fool reload
18446 into allowing us to accept one there. We need to fix them up
18447 now so that we output the right code. */
18448 if (op_alt
[opno
].memory_ok
)
18450 rtx op
= recog_data
.operand
[opno
];
18452 if (CONSTANT_P (op
))
18455 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
18456 recog_data
.operand_mode
[opno
], op
);
18458 else if (MEM_P (op
)
18459 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
18460 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
18464 rtx cop
= avoid_constant_pool_reference (op
);
18466 /* Casting the address of something to a mode narrower
18467 than a word can cause avoid_constant_pool_reference()
18468 to return the pool reference itself. That's no good to
18469 us here. Lets just hope that we can use the
18470 constant pool value directly. */
18472 cop
= get_pool_constant (XEXP (op
, 0));
18474 push_minipool_fix (insn
, address
,
18475 recog_data
.operand_loc
[opno
],
18476 recog_data
.operand_mode
[opno
], cop
);
18486 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18487 and unions in the context of ARMv8-M Security Extensions. It is used as a
18488 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18489 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18490 or four masks, depending on whether it is being computed for a
18491 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18492 respectively. The tree for the type of the argument or a field within an
18493 argument is passed in ARG_TYPE, the current register this argument or field
18494 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18495 argument or field starts at is passed in STARTING_BIT and the last used bit
18496 is kept in LAST_USED_BIT which is also updated accordingly. */
18498 static unsigned HOST_WIDE_INT
18499 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
18500 uint32_t * padding_bits_to_clear
,
18501 unsigned starting_bit
, int * last_used_bit
)
18504 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
18506 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
18508 unsigned current_bit
= starting_bit
;
18510 long int offset
, size
;
18513 field
= TYPE_FIELDS (arg_type
);
18516 /* The offset within a structure is always an offset from
18517 the start of that structure. Make sure we take that into the
18518 calculation of the register based offset that we use here. */
18519 offset
= starting_bit
;
18520 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
18523 /* This is the actual size of the field, for bitfields this is the
18524 bitfield width and not the container size. */
18525 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
18527 if (*last_used_bit
!= offset
)
18529 if (offset
< *last_used_bit
)
18531 /* This field's offset is before the 'last_used_bit', that
18532 means this field goes on the next register. So we need to
18533 pad the rest of the current register and increase the
18534 register number. */
18536 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
18539 padding_bits_to_clear
[*regno
] |= mask
;
18540 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18545 /* Otherwise we pad the bits between the last field's end and
18546 the start of the new field. */
18549 mask
= ((uint32_t)-1) >> (32 - offset
);
18550 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
18551 padding_bits_to_clear
[*regno
] |= mask
;
18553 current_bit
= offset
;
18556 /* Calculate further padding bits for inner structs/unions too. */
18557 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
18559 *last_used_bit
= current_bit
;
18560 not_to_clear_reg_mask
18561 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
18562 padding_bits_to_clear
, offset
,
18567 /* Update 'current_bit' with this field's size. If the
18568 'current_bit' lies in a subsequent register, update 'regno' and
18569 reset 'current_bit' to point to the current bit in that new
18571 current_bit
+= size
;
18572 while (current_bit
>= 32)
18575 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18578 *last_used_bit
= current_bit
;
18581 field
= TREE_CHAIN (field
);
18583 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18585 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
18587 tree field
, field_t
;
18588 int i
, regno_t
, field_size
;
18592 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
18593 = {-1, -1, -1, -1};
18595 /* To compute the padding bits in a union we only consider bits as
18596 padding bits if they are always either a padding bit or fall outside a
18597 fields size for all fields in the union. */
18598 field
= TYPE_FIELDS (arg_type
);
18601 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
18602 = {0U, 0U, 0U, 0U};
18603 int last_used_bit_t
= *last_used_bit
;
18605 field_t
= TREE_TYPE (field
);
18607 /* If the field's type is either a record or a union make sure to
18608 compute their padding bits too. */
18609 if (RECORD_OR_UNION_TYPE_P (field_t
))
18610 not_to_clear_reg_mask
18611 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
18612 &padding_bits_to_clear_t
[0],
18613 starting_bit
, &last_used_bit_t
);
18616 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
18617 regno_t
= (field_size
/ 32) + *regno
;
18618 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
18621 for (i
= *regno
; i
< regno_t
; i
++)
18623 /* For all but the last register used by this field only keep the
18624 padding bits that were padding bits in this field. */
18625 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
18628 /* For the last register, keep all padding bits that were padding
18629 bits in this field and any padding bits that are still valid
18630 as padding bits but fall outside of this field's size. */
18631 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
18632 padding_bits_to_clear_res
[regno_t
]
18633 &= padding_bits_to_clear_t
[regno_t
] | mask
;
18635 /* Update the maximum size of the fields in terms of registers used
18636 ('max_reg') and the 'last_used_bit' in said register. */
18637 if (max_reg
< regno_t
)
18640 max_bit
= last_used_bit_t
;
18642 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
18643 max_bit
= last_used_bit_t
;
18645 field
= TREE_CHAIN (field
);
18648 /* Update the current padding_bits_to_clear using the intersection of the
18649 padding bits of all the fields. */
18650 for (i
=*regno
; i
< max_reg
; i
++)
18651 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
18653 /* Do not keep trailing padding bits, we do not know yet whether this
18654 is the end of the argument. */
18655 mask
= ((uint32_t) 1 << max_bit
) - 1;
18656 padding_bits_to_clear
[max_reg
]
18657 |= padding_bits_to_clear_res
[max_reg
] & mask
;
18660 *last_used_bit
= max_bit
;
18663 /* This function should only be used for structs and unions. */
18664 gcc_unreachable ();
18666 return not_to_clear_reg_mask
;
18669 /* In the context of ARMv8-M Security Extensions, this function is used for both
18670 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18671 registers are used when returning or passing arguments, which is then
18672 returned as a mask. It will also compute a mask to indicate padding/unused
18673 bits for each of these registers, and passes this through the
18674 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18675 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18676 the starting register used to pass this argument or return value is passed
18677 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18678 for struct and union types. */
18680 static unsigned HOST_WIDE_INT
18681 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
18682 uint32_t * padding_bits_to_clear
)
18685 int last_used_bit
= 0;
18686 unsigned HOST_WIDE_INT not_to_clear_mask
;
18688 if (RECORD_OR_UNION_TYPE_P (arg_type
))
18691 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
18692 padding_bits_to_clear
, 0,
18696 /* If the 'last_used_bit' is not zero, that means we are still using a
18697 part of the last 'regno'. In such cases we must clear the trailing
18698 bits. Otherwise we are not using regno and we should mark it as to
18700 if (last_used_bit
!= 0)
18701 padding_bits_to_clear
[regno
]
18702 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
18704 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
18708 not_to_clear_mask
= 0;
18709 /* We are not dealing with structs nor unions. So these arguments may be
18710 passed in floating point registers too. In some cases a BLKmode is
18711 used when returning or passing arguments in multiple VFP registers. */
18712 if (GET_MODE (arg_rtx
) == BLKmode
)
18717 /* This should really only occur when dealing with the hard-float
18719 gcc_assert (TARGET_HARD_FLOAT_ABI
);
18721 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
18723 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
18724 gcc_assert (REG_P (reg
));
18726 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
18728 /* If we are dealing with DF mode, make sure we don't
18729 clear either of the registers it addresses. */
18730 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
18733 unsigned HOST_WIDE_INT mask
;
18734 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
18735 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
18736 not_to_clear_mask
|= mask
;
18742 /* Otherwise we can rely on the MODE to determine how many registers
18743 are being used by this argument. */
18744 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
18745 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
18748 unsigned HOST_WIDE_INT
18749 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
18750 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
18751 not_to_clear_mask
|= mask
;
18756 return not_to_clear_mask
;
18759 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18760 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18761 are to be fully cleared, using the value in register CLEARING_REG if more
18762 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18763 the bits that needs to be cleared in caller-saved core registers, with
18764 SCRATCH_REG used as a scratch register for that clearing.
18766 NOTE: one of three following assertions must hold:
18767 - SCRATCH_REG is a low register
18768 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18769 in TO_CLEAR_BITMAP)
18770 - CLEARING_REG is a low register. */
18773 cmse_clear_registers (sbitmap to_clear_bitmap
, uint32_t *padding_bits_to_clear
,
18774 int padding_bits_len
, rtx scratch_reg
, rtx clearing_reg
)
18776 bool saved_clearing
= false;
18777 rtx saved_clearing_reg
= NULL_RTX
;
18778 int i
, regno
, clearing_regno
, minregno
= R0_REGNUM
, maxregno
= minregno
- 1;
18780 gcc_assert (arm_arch_cmse
);
18782 if (!bitmap_empty_p (to_clear_bitmap
))
18784 minregno
= bitmap_first_set_bit (to_clear_bitmap
);
18785 maxregno
= bitmap_last_set_bit (to_clear_bitmap
);
18787 clearing_regno
= REGNO (clearing_reg
);
18789 /* Clear padding bits. */
18790 gcc_assert (padding_bits_len
<= NUM_ARG_REGS
);
18791 for (i
= 0, regno
= R0_REGNUM
; i
< padding_bits_len
; i
++, regno
++)
18794 rtx rtx16
, dest
, cleared_reg
= gen_rtx_REG (SImode
, regno
);
18796 if (padding_bits_to_clear
[i
] == 0)
18799 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18800 CLEARING_REG as scratch. */
18802 && REGNO (scratch_reg
) > LAST_LO_REGNUM
)
18804 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18805 such that we can use clearing_reg to clear the unused bits in the
18807 if ((clearing_regno
> maxregno
18808 || !bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
18809 && !saved_clearing
)
18811 gcc_assert (clearing_regno
<= LAST_LO_REGNUM
);
18812 emit_move_insn (scratch_reg
, clearing_reg
);
18813 saved_clearing
= true;
18814 saved_clearing_reg
= scratch_reg
;
18816 scratch_reg
= clearing_reg
;
18819 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18820 mask
= (~padding_bits_to_clear
[i
]) & 0xFFFF;
18821 emit_move_insn (scratch_reg
, gen_int_mode (mask
, SImode
));
18823 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18824 mask
= (~padding_bits_to_clear
[i
]) >> 16;
18825 rtx16
= gen_int_mode (16, SImode
);
18826 dest
= gen_rtx_ZERO_EXTRACT (SImode
, scratch_reg
, rtx16
, rtx16
);
18828 emit_insn (gen_rtx_SET (dest
, gen_int_mode (mask
, SImode
)));
18830 emit_insn (gen_andsi3 (cleared_reg
, cleared_reg
, scratch_reg
));
18832 if (saved_clearing
)
18833 emit_move_insn (clearing_reg
, saved_clearing_reg
);
18836 /* Clear full registers. */
18838 if (TARGET_HAVE_FPCXT_CMSE
)
18841 int i
, j
, k
, nb_regs
;
18842 rtx use_seq
, par
, reg
, set
, vunspec
;
18843 int to_clear_bitmap_size
= SBITMAP_SIZE (to_clear_bitmap
);
18844 auto_sbitmap
core_regs_bitmap (to_clear_bitmap_size
);
18845 auto_sbitmap
to_clear_core_bitmap (to_clear_bitmap_size
);
18847 for (i
= FIRST_VFP_REGNUM
; i
<= maxregno
; i
+= nb_regs
)
18849 /* Find next register to clear and exit if none. */
18850 for (; i
<= maxregno
&& !bitmap_bit_p (to_clear_bitmap
, i
); i
++);
18854 /* Compute number of consecutive registers to clear. */
18855 for (j
= i
; j
<= maxregno
&& bitmap_bit_p (to_clear_bitmap
, j
);
18859 /* Create VSCCLRM RTX pattern. */
18860 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nb_regs
+ 1));
18861 vunspec_vec
= gen_rtvec (1, gen_int_mode (0, SImode
));
18862 vunspec
= gen_rtx_UNSPEC_VOLATILE (SImode
, vunspec_vec
,
18863 VUNSPEC_VSCCLRM_VPR
);
18864 XVECEXP (par
, 0, 0) = vunspec
;
18866 /* Insert VFP register clearing RTX in the pattern. */
18868 for (k
= 1, j
= i
; j
<= maxregno
&& k
< nb_regs
+ 1; j
++)
18870 if (!bitmap_bit_p (to_clear_bitmap
, j
))
18873 reg
= gen_rtx_REG (SFmode
, j
);
18874 set
= gen_rtx_SET (reg
, const0_rtx
);
18875 XVECEXP (par
, 0, k
++) = set
;
18878 use_seq
= get_insns ();
18881 emit_insn_after (use_seq
, emit_insn (par
));
18884 /* Get set of core registers to clear. */
18885 bitmap_clear (core_regs_bitmap
);
18886 bitmap_set_range (core_regs_bitmap
, R0_REGNUM
,
18887 IP_REGNUM
- R0_REGNUM
+ 1);
18888 bitmap_and (to_clear_core_bitmap
, to_clear_bitmap
,
18890 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap
));
18892 if (bitmap_empty_p (to_clear_core_bitmap
))
18895 /* Create clrm RTX pattern. */
18896 nb_regs
= bitmap_count_bits (to_clear_core_bitmap
);
18897 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nb_regs
+ 2));
18899 /* Insert core register clearing RTX in the pattern. */
18901 for (j
= 0, i
= minregno
; j
< nb_regs
; i
++)
18903 if (!bitmap_bit_p (to_clear_core_bitmap
, i
))
18906 reg
= gen_rtx_REG (SImode
, i
);
18907 set
= gen_rtx_SET (reg
, const0_rtx
);
18908 XVECEXP (par
, 0, j
++) = set
;
18912 /* Insert APSR register clearing RTX in the pattern
18913 * along with clobbering CC. */
18914 vunspec_vec
= gen_rtvec (1, gen_int_mode (0, SImode
));
18915 vunspec
= gen_rtx_UNSPEC_VOLATILE (SImode
, vunspec_vec
,
18916 VUNSPEC_CLRM_APSR
);
18918 XVECEXP (par
, 0, j
++) = vunspec
;
18920 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
18921 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
18922 XVECEXP (par
, 0, j
) = clobber
;
18924 use_seq
= get_insns ();
18927 emit_insn_after (use_seq
, emit_insn (par
));
18931 /* If not marked for clearing, clearing_reg already does not contain
18933 if (clearing_regno
<= maxregno
18934 && bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
18936 emit_move_insn (clearing_reg
, const0_rtx
);
18937 emit_use (clearing_reg
);
18938 bitmap_clear_bit (to_clear_bitmap
, clearing_regno
);
18941 for (regno
= minregno
; regno
<= maxregno
; regno
++)
18943 if (!bitmap_bit_p (to_clear_bitmap
, regno
))
18946 if (IS_VFP_REGNUM (regno
))
18948 /* If regno is an even vfp register and its successor is also to
18949 be cleared, use vmov. */
18950 if (TARGET_VFP_DOUBLE
18951 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
18952 && bitmap_bit_p (to_clear_bitmap
, regno
+ 1))
18954 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
18955 CONST1_RTX (DFmode
));
18956 emit_use (gen_rtx_REG (DFmode
, regno
));
18961 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
18962 CONST1_RTX (SFmode
));
18963 emit_use (gen_rtx_REG (SFmode
, regno
));
18968 emit_move_insn (gen_rtx_REG (SImode
, regno
), clearing_reg
);
18969 emit_use (gen_rtx_REG (SImode
, regno
));
18975 /* Clear core and caller-saved VFP registers not used to pass arguments before
18976 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
18977 registers is done in the __gnu_cmse_nonsecure_call libcall. See
18978 libgcc/config/arm/cmse_nonsecure_call.S. */
18981 cmse_nonsecure_call_inline_register_clear (void)
18985 FOR_EACH_BB_FN (bb
, cfun
)
18989 FOR_BB_INSNS (bb
, insn
)
18991 bool clear_callee_saved
= TARGET_HAVE_FPCXT_CMSE
;
18992 /* frame = VFP regs + FPSCR + VPR. */
18993 unsigned lazy_store_stack_frame_size
18994 = (LAST_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1 + 2) * UNITS_PER_WORD
;
18995 unsigned long callee_saved_mask
18996 = ((1 << (LAST_HI_REGNUM
+ 1)) - 1)
18997 & ~((1 << (LAST_ARG_REGNUM
+ 1)) - 1);
18998 unsigned address_regnum
, regno
;
18999 unsigned max_int_regno
19000 = clear_callee_saved
? IP_REGNUM
: LAST_ARG_REGNUM
;
19001 unsigned max_fp_regno
19002 = TARGET_HAVE_FPCXT_CMSE
? LAST_VFP_REGNUM
: D7_VFP_REGNUM
;
19004 = TARGET_HARD_FLOAT_ABI
? max_fp_regno
: max_int_regno
;
19005 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
19007 rtx pat
, call
, unspec
, clearing_reg
, ip_reg
, shift
;
19009 CUMULATIVE_ARGS args_so_far_v
;
19010 cumulative_args_t args_so_far
;
19011 tree arg_type
, fntype
;
19012 bool first_param
= true, lazy_fpclear
= !TARGET_HARD_FLOAT_ABI
;
19013 function_args_iterator args_iter
;
19014 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
19016 if (!NONDEBUG_INSN_P (insn
))
19019 if (!CALL_P (insn
))
19022 pat
= PATTERN (insn
);
19023 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
19024 call
= XVECEXP (pat
, 0, 0);
19026 /* Get the real call RTX if the insn sets a value, ie. returns. */
19027 if (GET_CODE (call
) == SET
)
19028 call
= SET_SRC (call
);
19030 /* Check if it is a cmse_nonsecure_call. */
19031 unspec
= XEXP (call
, 0);
19032 if (GET_CODE (unspec
) != UNSPEC
19033 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
19036 /* Mark registers that needs to be cleared. Those that holds a
19037 parameter are removed from the set further below. */
19038 bitmap_clear (to_clear_bitmap
);
19039 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
,
19040 max_int_regno
- R0_REGNUM
+ 1);
19042 /* Only look at the caller-saved floating point registers in case of
19043 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
19044 lazy store and loads which clear both caller- and callee-saved
19048 auto_sbitmap
float_bitmap (maxregno
+ 1);
19050 bitmap_clear (float_bitmap
);
19051 bitmap_set_range (float_bitmap
, FIRST_VFP_REGNUM
,
19052 max_fp_regno
- FIRST_VFP_REGNUM
+ 1);
19053 bitmap_ior (to_clear_bitmap
, to_clear_bitmap
, float_bitmap
);
19056 /* Make sure the register used to hold the function address is not
19058 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
19059 gcc_assert (MEM_P (address
));
19060 gcc_assert (REG_P (XEXP (address
, 0)));
19061 address_regnum
= REGNO (XEXP (address
, 0));
19062 if (address_regnum
<= max_int_regno
)
19063 bitmap_clear_bit (to_clear_bitmap
, address_regnum
);
19065 /* Set basic block of call insn so that df rescan is performed on
19066 insns inserted here. */
19067 set_block_for_insn (insn
, bb
);
19068 df_set_flags (DF_DEFER_INSN_RESCAN
);
19071 /* Make sure the scheduler doesn't schedule other insns beyond
19073 emit_insn (gen_blockage ());
19075 /* Walk through all arguments and clear registers appropriately.
19077 fntype
= TREE_TYPE (MEM_EXPR (address
));
19078 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
19080 args_so_far
= pack_cumulative_args (&args_so_far_v
);
19081 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
19084 uint64_t to_clear_args_mask
;
19086 if (VOID_TYPE_P (arg_type
))
19089 function_arg_info
arg (arg_type
, /*named=*/true);
19091 /* ??? We should advance after processing the argument and pass
19092 the argument we're advancing past. */
19093 arm_function_arg_advance (args_so_far
, arg
);
19095 arg_rtx
= arm_function_arg (args_so_far
, arg
);
19096 gcc_assert (REG_P (arg_rtx
));
19098 = compute_not_to_clear_mask (arg_type
, arg_rtx
,
19100 &padding_bits_to_clear
[0]);
19101 if (to_clear_args_mask
)
19103 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
19105 if (to_clear_args_mask
& (1ULL << regno
))
19106 bitmap_clear_bit (to_clear_bitmap
, regno
);
19110 first_param
= false;
19113 /* We use right shift and left shift to clear the LSB of the address
19114 we jump to instead of using bic, to avoid having to use an extra
19115 register on Thumb-1. */
19116 clearing_reg
= XEXP (address
, 0);
19117 shift
= gen_rtx_LSHIFTRT (SImode
, clearing_reg
, const1_rtx
);
19118 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
19119 shift
= gen_rtx_ASHIFT (SImode
, clearing_reg
, const1_rtx
);
19120 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
19122 if (clear_callee_saved
)
19125 emit_multi_reg_push (callee_saved_mask
, callee_saved_mask
);
19126 /* Disable frame debug info in push because it needs to be
19127 disabled for pop (see below). */
19128 RTX_FRAME_RELATED_P (push_insn
) = 0;
19130 /* Lazy store multiple. */
19134 rtx_insn
*add_insn
;
19136 imm
= gen_int_mode (- lazy_store_stack_frame_size
, SImode
);
19137 add_insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
19138 stack_pointer_rtx
, imm
));
19139 /* If we have the frame pointer, then it will be the
19140 CFA reg. Otherwise, the stack pointer is the CFA
19141 reg, so we need to emit a CFA adjust. */
19142 if (!frame_pointer_needed
)
19143 arm_add_cfa_adjust_cfa_note (add_insn
,
19144 - lazy_store_stack_frame_size
,
19146 stack_pointer_rtx
);
19147 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx
));
19149 /* Save VFP callee-saved registers. */
19152 vfp_emit_fstmd (D7_VFP_REGNUM
+ 1,
19153 (max_fp_regno
- D7_VFP_REGNUM
) / 2);
19154 /* Disable frame debug info in push because it needs to be
19155 disabled for vpop (see below). */
19156 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19160 /* Clear caller-saved registers that leak before doing a non-secure
19162 ip_reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
19163 cmse_clear_registers (to_clear_bitmap
, padding_bits_to_clear
,
19164 NUM_ARG_REGS
, ip_reg
, clearing_reg
);
19166 seq
= get_insns ();
19168 emit_insn_before (seq
, insn
);
19170 if (TARGET_HAVE_FPCXT_CMSE
)
19172 rtx_insn
*last
, *pop_insn
, *after
= insn
;
19176 /* Lazy load multiple done as part of libcall in Armv8-M. */
19179 rtx imm
= gen_int_mode (lazy_store_stack_frame_size
, SImode
);
19180 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx
));
19181 rtx_insn
*add_insn
=
19182 emit_insn (gen_addsi3 (stack_pointer_rtx
,
19183 stack_pointer_rtx
, imm
));
19184 if (!frame_pointer_needed
)
19185 arm_add_cfa_adjust_cfa_note (add_insn
,
19186 lazy_store_stack_frame_size
,
19188 stack_pointer_rtx
);
19190 /* Restore VFP callee-saved registers. */
19193 int nb_callee_saved_vfp_regs
=
19194 (max_fp_regno
- D7_VFP_REGNUM
) / 2;
19195 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM
+ 1,
19196 nb_callee_saved_vfp_regs
,
19197 stack_pointer_rtx
);
19198 /* Disable frame debug info in vpop because the SP adjustment
19199 is made using a CFA adjustment note while CFA used is
19200 sometimes R7. This then causes an assert failure in the
19201 CFI note creation code. */
19202 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19205 arm_emit_multi_reg_pop (callee_saved_mask
);
19206 pop_insn
= get_last_insn ();
19208 /* Disable frame debug info in pop because they reset the state
19209 of popped registers to what it was at the beginning of the
19210 function, before the prologue. This leads to incorrect state
19211 when doing the pop after the nonsecure call for registers that
19212 are pushed both in prologue and before the nonsecure call.
19214 It also occasionally triggers an assert failure in CFI note
19215 creation code when there are two codepaths to the epilogue,
19216 one of which does not go through the nonsecure call.
19217 Obviously this mean that debugging between the push and pop is
19219 RTX_FRAME_RELATED_P (pop_insn
) = 0;
19221 seq
= get_insns ();
19222 last
= get_last_insn ();
19225 emit_insn_after (seq
, after
);
19227 /* Skip pop we have just inserted after nonsecure call, we know
19228 it does not contain a nonsecure call. */
19235 /* Rewrite move insn into subtract of 0 if the condition codes will
19236 be useful in next conditional jump insn. */
19239 thumb1_reorg (void)
19243 FOR_EACH_BB_FN (bb
, cfun
)
19246 rtx cmp
, op0
, op1
, set
= NULL
;
19247 rtx_insn
*prev
, *insn
= BB_END (bb
);
19248 bool insn_clobbered
= false;
19250 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
19251 insn
= PREV_INSN (insn
);
19253 /* Find the last cbranchsi4_insn in basic block BB. */
19254 if (insn
== BB_HEAD (bb
)
19255 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
19258 /* Get the register with which we are comparing. */
19259 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
19260 op0
= XEXP (cmp
, 0);
19261 op1
= XEXP (cmp
, 1);
19263 /* Check that comparison is against ZERO. */
19264 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
19267 /* Find the first flag setting insn before INSN in basic block BB. */
19268 gcc_assert (insn
!= BB_HEAD (bb
));
19269 for (prev
= PREV_INSN (insn
);
19271 && prev
!= BB_HEAD (bb
)
19273 || DEBUG_INSN_P (prev
)
19274 || ((set
= single_set (prev
)) != NULL
19275 && get_attr_conds (prev
) == CONDS_NOCOND
)));
19276 prev
= PREV_INSN (prev
))
19278 if (reg_set_p (op0
, prev
))
19279 insn_clobbered
= true;
19282 /* Skip if op0 is clobbered by insn other than prev. */
19283 if (insn_clobbered
)
19289 dest
= SET_DEST (set
);
19290 src
= SET_SRC (set
);
19291 if (!low_register_operand (dest
, SImode
)
19292 || !low_register_operand (src
, SImode
))
19295 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19296 in INSN. Both src and dest of the move insn are checked. */
19297 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
19299 dest
= copy_rtx (dest
);
19300 src
= copy_rtx (src
);
19301 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
19302 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
19303 INSN_CODE (prev
) = -1;
19304 /* Set test register in INSN to dest. */
19305 XEXP (cmp
, 0) = copy_rtx (dest
);
19306 INSN_CODE (insn
) = -1;
19311 /* Convert instructions to their cc-clobbering variant if possible, since
19312 that allows us to use smaller encodings. */
19315 thumb2_reorg (void)
19320 INIT_REG_SET (&live
);
19322 /* We are freeing block_for_insn in the toplev to keep compatibility
19323 with old MDEP_REORGS that are not CFG based. Recompute it now. */
19324 compute_bb_for_insn ();
19327 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
19329 FOR_EACH_BB_FN (bb
, cfun
)
19331 if ((current_tune
->disparage_flag_setting_t16_encodings
19332 == tune_params::DISPARAGE_FLAGS_ALL
)
19333 && optimize_bb_for_speed_p (bb
))
19337 Convert_Action action
= SKIP
;
19338 Convert_Action action_for_partial_flag_setting
19339 = ((current_tune
->disparage_flag_setting_t16_encodings
19340 != tune_params::DISPARAGE_FLAGS_NEITHER
)
19341 && optimize_bb_for_speed_p (bb
))
19344 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
19345 df_simulate_initialize_backwards (bb
, &live
);
19346 FOR_BB_INSNS_REVERSE (bb
, insn
)
19348 if (NONJUMP_INSN_P (insn
)
19349 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
19350 && GET_CODE (PATTERN (insn
)) == SET
)
19353 rtx pat
= PATTERN (insn
);
19354 rtx dst
= XEXP (pat
, 0);
19355 rtx src
= XEXP (pat
, 1);
19356 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
19358 if (UNARY_P (src
) || BINARY_P (src
))
19359 op0
= XEXP (src
, 0);
19361 if (BINARY_P (src
))
19362 op1
= XEXP (src
, 1);
19364 if (low_register_operand (dst
, SImode
))
19366 switch (GET_CODE (src
))
19369 /* Adding two registers and storing the result
19370 in the first source is already a 16-bit
19372 if (rtx_equal_p (dst
, op0
)
19373 && register_operand (op1
, SImode
))
19376 if (low_register_operand (op0
, SImode
))
19378 /* ADDS <Rd>,<Rn>,<Rm> */
19379 if (low_register_operand (op1
, SImode
))
19381 /* ADDS <Rdn>,#<imm8> */
19382 /* SUBS <Rdn>,#<imm8> */
19383 else if (rtx_equal_p (dst
, op0
)
19384 && CONST_INT_P (op1
)
19385 && IN_RANGE (INTVAL (op1
), -255, 255))
19387 /* ADDS <Rd>,<Rn>,#<imm3> */
19388 /* SUBS <Rd>,<Rn>,#<imm3> */
19389 else if (CONST_INT_P (op1
)
19390 && IN_RANGE (INTVAL (op1
), -7, 7))
19393 /* ADCS <Rd>, <Rn> */
19394 else if (GET_CODE (XEXP (src
, 0)) == PLUS
19395 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
19396 && low_register_operand (XEXP (XEXP (src
, 0), 1),
19398 && COMPARISON_P (op1
)
19399 && cc_register (XEXP (op1
, 0), VOIDmode
)
19400 && maybe_get_arm_condition_code (op1
) == ARM_CS
19401 && XEXP (op1
, 1) == const0_rtx
)
19406 /* RSBS <Rd>,<Rn>,#0
19407 Not handled here: see NEG below. */
19408 /* SUBS <Rd>,<Rn>,#<imm3>
19410 Not handled here: see PLUS above. */
19411 /* SUBS <Rd>,<Rn>,<Rm> */
19412 if (low_register_operand (op0
, SImode
)
19413 && low_register_operand (op1
, SImode
))
19418 /* MULS <Rdm>,<Rn>,<Rdm>
19419 As an exception to the rule, this is only used
19420 when optimizing for size since MULS is slow on all
19421 known implementations. We do not even want to use
19422 MULS in cold code, if optimizing for speed, so we
19423 test the global flag here. */
19424 if (!optimize_size
)
19426 /* Fall through. */
19430 /* ANDS <Rdn>,<Rm> */
19431 if (rtx_equal_p (dst
, op0
)
19432 && low_register_operand (op1
, SImode
))
19433 action
= action_for_partial_flag_setting
;
19434 else if (rtx_equal_p (dst
, op1
)
19435 && low_register_operand (op0
, SImode
))
19436 action
= action_for_partial_flag_setting
== SKIP
19437 ? SKIP
: SWAP_CONV
;
19443 /* ASRS <Rdn>,<Rm> */
19444 /* LSRS <Rdn>,<Rm> */
19445 /* LSLS <Rdn>,<Rm> */
19446 if (rtx_equal_p (dst
, op0
)
19447 && low_register_operand (op1
, SImode
))
19448 action
= action_for_partial_flag_setting
;
19449 /* ASRS <Rd>,<Rm>,#<imm5> */
19450 /* LSRS <Rd>,<Rm>,#<imm5> */
19451 /* LSLS <Rd>,<Rm>,#<imm5> */
19452 else if (low_register_operand (op0
, SImode
)
19453 && CONST_INT_P (op1
)
19454 && IN_RANGE (INTVAL (op1
), 0, 31))
19455 action
= action_for_partial_flag_setting
;
19459 /* RORS <Rdn>,<Rm> */
19460 if (rtx_equal_p (dst
, op0
)
19461 && low_register_operand (op1
, SImode
))
19462 action
= action_for_partial_flag_setting
;
19466 /* MVNS <Rd>,<Rm> */
19467 if (low_register_operand (op0
, SImode
))
19468 action
= action_for_partial_flag_setting
;
19472 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19473 if (low_register_operand (op0
, SImode
))
19478 /* MOVS <Rd>,#<imm8> */
19479 if (CONST_INT_P (src
)
19480 && IN_RANGE (INTVAL (src
), 0, 255))
19481 action
= action_for_partial_flag_setting
;
19485 /* MOVS and MOV<c> with registers have different
19486 encodings, so are not relevant here. */
19494 if (action
!= SKIP
)
19496 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
19497 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
19500 if (action
== SWAP_CONV
)
19502 src
= copy_rtx (src
);
19503 XEXP (src
, 0) = op1
;
19504 XEXP (src
, 1) = op0
;
19505 pat
= gen_rtx_SET (dst
, src
);
19506 vec
= gen_rtvec (2, pat
, clobber
);
19508 else /* action == CONV */
19509 vec
= gen_rtvec (2, pat
, clobber
);
19511 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
19512 INSN_CODE (insn
) = -1;
19516 if (NONDEBUG_INSN_P (insn
))
19517 df_simulate_one_insn_backwards (bb
, insn
, &live
);
19521 CLEAR_REG_SET (&live
);
19524 /* Gcc puts the pool in the wrong place for ARM, since we can only
19525 load addresses a limited distance around the pc. We do some
19526 special munging to move the constant pool values to the correct
19527 point in the code. */
19532 HOST_WIDE_INT address
= 0;
19536 cmse_nonsecure_call_inline_register_clear ();
19538 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19539 if (cfun
->is_thunk
)
19541 else if (TARGET_THUMB1
)
19543 else if (TARGET_THUMB2
)
19546 /* Ensure all insns that must be split have been split at this point.
19547 Otherwise, the pool placement code below may compute incorrect
19548 insn lengths. Note that when optimizing, all insns have already
19549 been split at this point. */
19551 split_all_insns_noflow ();
19553 /* Make sure we do not attempt to create a literal pool even though it should
19554 no longer be necessary to create any. */
19555 if (arm_disable_literal_pool
)
19558 minipool_fix_head
= minipool_fix_tail
= NULL
;
19560 /* The first insn must always be a note, or the code below won't
19561 scan it properly. */
19562 insn
= get_insns ();
19563 gcc_assert (NOTE_P (insn
));
19566 /* Scan all the insns and record the operands that will need fixing. */
19567 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
19569 if (BARRIER_P (insn
))
19570 push_minipool_barrier (insn
, address
);
19571 else if (INSN_P (insn
))
19573 rtx_jump_table_data
*table
;
19575 note_invalid_constants (insn
, address
, true);
19576 address
+= get_attr_length (insn
);
19578 /* If the insn is a vector jump, add the size of the table
19579 and skip the table. */
19580 if (tablejump_p (insn
, NULL
, &table
))
19582 address
+= get_jump_table_size (table
);
19586 else if (LABEL_P (insn
))
19587 /* Add the worst-case padding due to alignment. We don't add
19588 the _current_ padding because the minipool insertions
19589 themselves might change it. */
19590 address
+= get_label_padding (insn
);
19593 fix
= minipool_fix_head
;
19595 /* Now scan the fixups and perform the required changes. */
19600 Mfix
* last_added_fix
;
19601 Mfix
* last_barrier
= NULL
;
19604 /* Skip any further barriers before the next fix. */
19605 while (fix
&& BARRIER_P (fix
->insn
))
19608 /* No more fixes. */
19612 last_added_fix
= NULL
;
19614 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
19616 if (BARRIER_P (ftmp
->insn
))
19618 if (ftmp
->address
>= minipool_vector_head
->max_address
)
19621 last_barrier
= ftmp
;
19623 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
19626 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
19629 /* If we found a barrier, drop back to that; any fixes that we
19630 could have reached but come after the barrier will now go in
19631 the next mini-pool. */
19632 if (last_barrier
!= NULL
)
19634 /* Reduce the refcount for those fixes that won't go into this
19636 for (fdel
= last_barrier
->next
;
19637 fdel
&& fdel
!= ftmp
;
19640 fdel
->minipool
->refcount
--;
19641 fdel
->minipool
= NULL
;
19644 ftmp
= last_barrier
;
19648 /* ftmp is first fix that we can't fit into this pool and
19649 there no natural barriers that we could use. Insert a
19650 new barrier in the code somewhere between the previous
19651 fix and this one, and arrange to jump around it. */
19652 HOST_WIDE_INT max_address
;
19654 /* The last item on the list of fixes must be a barrier, so
19655 we can never run off the end of the list of fixes without
19656 last_barrier being set. */
19659 max_address
= minipool_vector_head
->max_address
;
19660 /* Check that there isn't another fix that is in range that
19661 we couldn't fit into this pool because the pool was
19662 already too large: we need to put the pool before such an
19663 instruction. The pool itself may come just after the
19664 fix because create_fix_barrier also allows space for a
19665 jump instruction. */
19666 if (ftmp
->address
< max_address
)
19667 max_address
= ftmp
->address
+ 1;
19669 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
19672 assign_minipool_offsets (last_barrier
);
19676 if (!BARRIER_P (ftmp
->insn
)
19677 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
19684 /* Scan over the fixes we have identified for this pool, fixing them
19685 up and adding the constants to the pool itself. */
19686 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
19687 this_fix
= this_fix
->next
)
19688 if (!BARRIER_P (this_fix
->insn
))
19691 = plus_constant (Pmode
,
19692 gen_rtx_LABEL_REF (VOIDmode
,
19693 minipool_vector_label
),
19694 this_fix
->minipool
->offset
);
19695 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
19698 dump_minipool (last_barrier
->insn
);
19702 /* From now on we must synthesize any constants that we can't handle
19703 directly. This can happen if the RTL gets split during final
19704 instruction generation. */
19705 cfun
->machine
->after_arm_reorg
= 1;
19707 /* Free the minipool memory. */
19708 obstack_free (&minipool_obstack
, minipool_startobj
);
19711 /* Routines to output assembly language. */
19713 /* Return string representation of passed in real value. */
19714 static const char *
19715 fp_const_from_val (REAL_VALUE_TYPE
*r
)
19717 if (!fp_consts_inited
)
19720 gcc_assert (real_equal (r
, &value_fp0
));
19724 /* OPERANDS[0] is the entire list of insns that constitute pop,
19725 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19726 is in the list, UPDATE is true iff the list contains explicit
19727 update of base register. */
19729 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
19735 const char *conditional
;
19736 int num_saves
= XVECLEN (operands
[0], 0);
19737 unsigned int regno
;
19738 unsigned int regno_base
= REGNO (operands
[1]);
19739 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
19742 offset
+= update
? 1 : 0;
19743 offset
+= return_pc
? 1 : 0;
19745 /* Is the base register in the list? */
19746 for (i
= offset
; i
< num_saves
; i
++)
19748 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
19749 /* If SP is in the list, then the base register must be SP. */
19750 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
19751 /* If base register is in the list, there must be no explicit update. */
19752 if (regno
== regno_base
)
19753 gcc_assert (!update
);
19756 conditional
= reverse
? "%?%D0" : "%?%d0";
19757 /* Can't use POP if returning from an interrupt. */
19758 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
19759 sprintf (pattern
, "pop%s\t{", conditional
);
19762 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19763 It's just a convention, their semantics are identical. */
19764 if (regno_base
== SP_REGNUM
)
19765 sprintf (pattern
, "ldmfd%s\t", conditional
);
19767 sprintf (pattern
, "ldmia%s\t", conditional
);
19769 sprintf (pattern
, "ldm%s\t", conditional
);
19771 strcat (pattern
, reg_names
[regno_base
]);
19773 strcat (pattern
, "!, {");
19775 strcat (pattern
, ", {");
19778 /* Output the first destination register. */
19780 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
19782 /* Output the rest of the destination registers. */
19783 for (i
= offset
+ 1; i
< num_saves
; i
++)
19785 strcat (pattern
, ", ");
19787 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
19790 strcat (pattern
, "}");
19792 if (interrupt_p
&& return_pc
)
19793 strcat (pattern
, "^");
19795 output_asm_insn (pattern
, &cond
);
19799 /* Output the assembly for a store multiple. */
19802 vfp_output_vstmd (rtx
* operands
)
19808 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
19809 ? XEXP (operands
[0], 0)
19810 : XEXP (XEXP (operands
[0], 0), 0);
19811 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
19814 strcpy (pattern
, "vpush%?.64\t{%P1");
19816 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
19818 p
= strlen (pattern
);
19820 gcc_assert (REG_P (operands
[1]));
19822 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
19823 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
19825 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
19827 strcpy (&pattern
[p
], "}");
19829 output_asm_insn (pattern
, operands
);
19834 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19835 number of bytes pushed. */
19838 vfp_emit_fstmd (int base_reg
, int count
)
19845 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19846 register pairs are stored by a store multiple insn. We avoid this
19847 by pushing an extra pair. */
19848 if (count
== 2 && !arm_arch6
)
19850 if (base_reg
== LAST_VFP_REGNUM
- 3)
19855 /* FSTMD may not store more than 16 doubleword registers at once. Split
19856 larger stores into multiple parts (up to a maximum of two, in
19861 /* NOTE: base_reg is an internal register number, so each D register
19863 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
19864 saved
+= vfp_emit_fstmd (base_reg
, 16);
19868 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
19869 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
19871 reg
= gen_rtx_REG (DFmode
, base_reg
);
19874 XVECEXP (par
, 0, 0)
19875 = gen_rtx_SET (gen_frame_mem
19877 gen_rtx_PRE_MODIFY (Pmode
,
19880 (Pmode
, stack_pointer_rtx
,
19883 gen_rtx_UNSPEC (BLKmode
,
19884 gen_rtvec (1, reg
),
19885 UNSPEC_PUSH_MULT
));
19887 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19888 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
19889 RTX_FRAME_RELATED_P (tmp
) = 1;
19890 XVECEXP (dwarf
, 0, 0) = tmp
;
19892 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
19893 RTX_FRAME_RELATED_P (tmp
) = 1;
19894 XVECEXP (dwarf
, 0, 1) = tmp
;
19896 for (i
= 1; i
< count
; i
++)
19898 reg
= gen_rtx_REG (DFmode
, base_reg
);
19900 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
19902 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
19903 plus_constant (Pmode
,
19907 RTX_FRAME_RELATED_P (tmp
) = 1;
19908 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
19911 par
= emit_insn (par
);
19912 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
19913 RTX_FRAME_RELATED_P (par
) = 1;
19918 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19919 has the cmse_nonsecure_call attribute and returns false otherwise. */
19922 detect_cmse_nonsecure_call (tree addr
)
19927 tree fntype
= TREE_TYPE (addr
);
19928 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
19929 TYPE_ATTRIBUTES (fntype
)))
19935 /* Emit a call instruction with pattern PAT. ADDR is the address of
19936 the call target. */
19939 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
19943 insn
= emit_call_insn (pat
);
19945 /* The PIC register is live on entry to VxWorks PIC PLT entries.
19946 If the call might use such an entry, add a use of the PIC register
19947 to the instruction's CALL_INSN_FUNCTION_USAGE. */
19948 if (TARGET_VXWORKS_RTP
19951 && SYMBOL_REF_P (addr
)
19952 && (SYMBOL_REF_DECL (addr
)
19953 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
19954 : !SYMBOL_REF_LOCAL_P (addr
)))
19956 require_pic_register (NULL_RTX
, false /*compute_now*/);
19957 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
19962 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
19963 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), fdpic_reg
);
19966 if (TARGET_AAPCS_BASED
)
19968 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19969 linker. We need to add an IP clobber to allow setting
19970 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
19971 is not needed since it's a fixed register. */
19972 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
19973 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
19977 /* Output a 'call' insn. */
19979 output_call (rtx
*operands
)
19981 gcc_assert (!arm_arch5t
); /* Patterns should call blx <reg> directly. */
19983 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
19984 if (REGNO (operands
[0]) == LR_REGNUM
)
19986 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
19987 output_asm_insn ("mov%?\t%0, %|lr", operands
);
19990 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
19992 if (TARGET_INTERWORK
|| arm_arch4t
)
19993 output_asm_insn ("bx%?\t%0", operands
);
19995 output_asm_insn ("mov%?\t%|pc, %0", operands
);
20000 /* Output a move from arm registers to arm registers of a long double
20001 OPERANDS[0] is the destination.
20002 OPERANDS[1] is the source. */
20004 output_mov_long_double_arm_from_arm (rtx
*operands
)
20006 /* We have to be careful here because the two might overlap. */
20007 int dest_start
= REGNO (operands
[0]);
20008 int src_start
= REGNO (operands
[1]);
20012 if (dest_start
< src_start
)
20014 for (i
= 0; i
< 3; i
++)
20016 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
20017 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
20018 output_asm_insn ("mov%?\t%0, %1", ops
);
20023 for (i
= 2; i
>= 0; i
--)
20025 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
20026 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
20027 output_asm_insn ("mov%?\t%0, %1", ops
);
20035 arm_emit_movpair (rtx dest
, rtx src
)
20037 /* If the src is an immediate, simplify it. */
20038 if (CONST_INT_P (src
))
20040 HOST_WIDE_INT val
= INTVAL (src
);
20041 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
20042 if ((val
>> 16) & 0x0000ffff)
20044 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
20046 GEN_INT ((val
>> 16) & 0x0000ffff));
20047 rtx_insn
*insn
= get_last_insn ();
20048 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
20052 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
20053 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
20054 rtx_insn
*insn
= get_last_insn ();
20055 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
20058 /* Output a move between double words. It must be REG<-MEM
20061 output_move_double (rtx
*operands
, bool emit
, int *count
)
20063 enum rtx_code code0
= GET_CODE (operands
[0]);
20064 enum rtx_code code1
= GET_CODE (operands
[1]);
20069 /* The only case when this might happen is when
20070 you are looking at the length of a DImode instruction
20071 that has an invalid constant in it. */
20072 if (code0
== REG
&& code1
!= MEM
)
20074 gcc_assert (!emit
);
20081 unsigned int reg0
= REGNO (operands
[0]);
20082 const bool can_ldrd
= TARGET_LDRD
&& (TARGET_THUMB2
|| (reg0
% 2 == 0));
20084 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
20086 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
20088 switch (GET_CODE (XEXP (operands
[1], 0)))
20095 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
20096 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
20098 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
20103 gcc_assert (can_ldrd
);
20105 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
20112 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
20114 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
20122 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
20124 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
20129 gcc_assert (can_ldrd
);
20131 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
20136 /* Autoicrement addressing modes should never have overlapping
20137 base and destination registers, and overlapping index registers
20138 are already prohibited, so this doesn't need to worry about
20140 otherops
[0] = operands
[0];
20141 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
20142 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
20144 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
20146 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
20148 /* Registers overlap so split out the increment. */
20151 gcc_assert (can_ldrd
);
20152 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
20153 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
20160 /* Use a single insn if we can.
20161 FIXME: IWMMXT allows offsets larger than ldrd can
20162 handle, fix these up with a pair of ldr. */
20165 || !CONST_INT_P (otherops
[2])
20166 || (INTVAL (otherops
[2]) > -256
20167 && INTVAL (otherops
[2]) < 256)))
20170 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
20176 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
20177 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
20187 /* Use a single insn if we can.
20188 FIXME: IWMMXT allows offsets larger than ldrd can handle,
20189 fix these up with a pair of ldr. */
20192 || !CONST_INT_P (otherops
[2])
20193 || (INTVAL (otherops
[2]) > -256
20194 && INTVAL (otherops
[2]) < 256)))
20197 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
20203 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
20204 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
20214 /* We might be able to use ldrd %0, %1 here. However the range is
20215 different to ldr/adr, and it is broken on some ARMv7-M
20216 implementations. */
20217 /* Use the second register of the pair to avoid problematic
20219 otherops
[1] = operands
[1];
20221 output_asm_insn ("adr%?\t%0, %1", otherops
);
20222 operands
[1] = otherops
[0];
20226 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
20228 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
20235 /* ??? This needs checking for thumb2. */
20237 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
20238 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
20240 otherops
[0] = operands
[0];
20241 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
20242 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
20244 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
20246 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
20248 switch ((int) INTVAL (otherops
[2]))
20252 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
20258 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
20264 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
20268 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
20269 operands
[1] = otherops
[0];
20271 && (REG_P (otherops
[2])
20273 || (CONST_INT_P (otherops
[2])
20274 && INTVAL (otherops
[2]) > -256
20275 && INTVAL (otherops
[2]) < 256)))
20277 if (reg_overlap_mentioned_p (operands
[0],
20280 /* Swap base and index registers over to
20281 avoid a conflict. */
20282 std::swap (otherops
[1], otherops
[2]);
20284 /* If both registers conflict, it will usually
20285 have been fixed by a splitter. */
20286 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
20287 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
20291 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20292 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
20299 otherops
[0] = operands
[0];
20301 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
20306 if (CONST_INT_P (otherops
[2]))
20310 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
20311 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
20313 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20319 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20325 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
20332 return "ldrd%?\t%0, [%1]";
20334 return "ldmia%?\t%1, %M0";
20338 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
20339 /* Take care of overlapping base/data reg. */
20340 if (reg_mentioned_p (operands
[0], operands
[1]))
20344 output_asm_insn ("ldr%?\t%0, %1", otherops
);
20345 output_asm_insn ("ldr%?\t%0, %1", operands
);
20355 output_asm_insn ("ldr%?\t%0, %1", operands
);
20356 output_asm_insn ("ldr%?\t%0, %1", otherops
);
20366 /* Constraints should ensure this. */
20367 gcc_assert (code0
== MEM
&& code1
== REG
);
20368 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
20369 || (TARGET_ARM
&& TARGET_LDRD
));
20371 /* For TARGET_ARM the first source register of an STRD
20372 must be even. This is usually the case for double-word
20373 values but user assembly constraints can force an odd
20374 starting register. */
20375 bool allow_strd
= TARGET_LDRD
20376 && !(TARGET_ARM
&& (REGNO (operands
[1]) & 1) == 1);
20377 switch (GET_CODE (XEXP (operands
[0], 0)))
20383 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
20385 output_asm_insn ("stm%?\t%m0, %M1", operands
);
20390 gcc_assert (allow_strd
);
20392 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
20399 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
20401 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
20409 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
20411 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
20416 gcc_assert (allow_strd
);
20418 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
20423 otherops
[0] = operands
[1];
20424 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
20425 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
20427 /* IWMMXT allows offsets larger than strd can handle,
20428 fix these up with a pair of str. */
20430 && CONST_INT_P (otherops
[2])
20431 && (INTVAL(otherops
[2]) <= -256
20432 || INTVAL(otherops
[2]) >= 256))
20434 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
20438 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
20439 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
20448 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
20449 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
20455 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
20458 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
20463 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
20468 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
20469 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
20471 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
20475 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
20482 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
20489 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
20494 && (REG_P (otherops
[2])
20496 || (CONST_INT_P (otherops
[2])
20497 && INTVAL (otherops
[2]) > -256
20498 && INTVAL (otherops
[2]) < 256)))
20500 otherops
[0] = operands
[1];
20501 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
20503 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
20509 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
20510 otherops
[1] = operands
[1];
20513 output_asm_insn ("str%?\t%1, %0", operands
);
20514 output_asm_insn ("str%?\t%H1, %0", otherops
);
20524 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20525 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20528 output_move_quad (rtx
*operands
)
20530 if (REG_P (operands
[0]))
20532 /* Load, or reg->reg move. */
20534 if (MEM_P (operands
[1]))
20536 switch (GET_CODE (XEXP (operands
[1], 0)))
20539 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
20544 output_asm_insn ("adr%?\t%0, %1", operands
);
20545 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
20549 gcc_unreachable ();
20557 gcc_assert (REG_P (operands
[1]));
20559 dest
= REGNO (operands
[0]);
20560 src
= REGNO (operands
[1]);
20562 /* This seems pretty dumb, but hopefully GCC won't try to do it
20565 for (i
= 0; i
< 4; i
++)
20567 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
20568 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
20569 output_asm_insn ("mov%?\t%0, %1", ops
);
20572 for (i
= 3; i
>= 0; i
--)
20574 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
20575 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
20576 output_asm_insn ("mov%?\t%0, %1", ops
);
20582 gcc_assert (MEM_P (operands
[0]));
20583 gcc_assert (REG_P (operands
[1]));
20584 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
20586 switch (GET_CODE (XEXP (operands
[0], 0)))
20589 output_asm_insn ("stm%?\t%m0, %M1", operands
);
20593 gcc_unreachable ();
20600 /* Output a VFP load or store instruction. */
20603 output_move_vfp (rtx
*operands
)
20605 rtx reg
, mem
, addr
, ops
[2];
20606 int load
= REG_P (operands
[0]);
20607 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
20608 int sp
= (!TARGET_VFP_FP16INST
20609 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
20610 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
20615 reg
= operands
[!load
];
20616 mem
= operands
[load
];
20618 mode
= GET_MODE (reg
);
20620 gcc_assert (REG_P (reg
));
20621 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
20622 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
20628 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
20629 gcc_assert (MEM_P (mem
));
20631 addr
= XEXP (mem
, 0);
20633 switch (GET_CODE (addr
))
20636 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20637 ops
[0] = XEXP (addr
, 0);
20642 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20643 ops
[0] = XEXP (addr
, 0);
20648 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
20654 sprintf (buff
, templ
,
20655 load
? "ld" : "st",
20656 dp
? "64" : sp
? "32" : "16",
20658 integer_p
? "\t%@ int" : "");
20659 output_asm_insn (buff
, ops
);
20664 /* Output a Neon double-word or quad-word load or store, or a load
20665 or store for larger structure modes.
20667 WARNING: The ordering of elements is weird in big-endian mode,
20668 because the EABI requires that vectors stored in memory appear
20669 as though they were stored by a VSTM, as required by the EABI.
20670 GCC RTL defines element ordering based on in-memory order.
20671 This can be different from the architectural ordering of elements
20672 within a NEON register. The intrinsics defined in arm_neon.h use the
20673 NEON register element ordering, not the GCC RTL element ordering.
20675 For example, the in-memory ordering of a big-endian a quadword
20676 vector with 16-bit elements when stored from register pair {d0,d1}
20677 will be (lowest address first, d0[N] is NEON register element N):
20679 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20681 When necessary, quadword registers (dN, dN+1) are moved to ARM
20682 registers from rN in the order:
20684 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20686 So that STM/LDM can be used on vectors in ARM registers, and the
20687 same memory layout will result as if VSTM/VLDM were used.
20689 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20690 possible, which allows use of appropriate alignment tags.
20691 Note that the choice of "64" is independent of the actual vector
20692 element size; this size simply ensures that the behavior is
20693 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20695 Due to limitations of those instructions, use of VST1.64/VLD1.64
20696 is not possible if:
20697 - the address contains PRE_DEC, or
20698 - the mode refers to more than 4 double-word registers
20700 In those cases, it would be possible to replace VSTM/VLDM by a
20701 sequence of instructions; this is not currently implemented since
20702 this is not certain to actually improve performance. */
20705 output_move_neon (rtx
*operands
)
20707 rtx reg
, mem
, addr
, ops
[2];
20708 int regno
, nregs
, load
= REG_P (operands
[0]);
20713 reg
= operands
[!load
];
20714 mem
= operands
[load
];
20716 mode
= GET_MODE (reg
);
20718 gcc_assert (REG_P (reg
));
20719 regno
= REGNO (reg
);
20720 nregs
= REG_NREGS (reg
) / 2;
20721 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
20722 || NEON_REGNO_OK_FOR_QUAD (regno
));
20723 gcc_assert (VALID_NEON_DREG_MODE (mode
)
20724 || VALID_NEON_QREG_MODE (mode
)
20725 || VALID_NEON_STRUCT_MODE (mode
));
20726 gcc_assert (MEM_P (mem
));
20728 addr
= XEXP (mem
, 0);
20730 /* Strip off const from addresses like (const (plus (...))). */
20731 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
20732 addr
= XEXP (addr
, 0);
20734 switch (GET_CODE (addr
))
20737 /* We have to use vldm / vstm for too-large modes. */
20738 if (nregs
> 4 || (TARGET_HAVE_MVE
&& nregs
>= 2))
20740 templ
= "v%smia%%?\t%%0!, %%h1";
20741 ops
[0] = XEXP (addr
, 0);
20745 templ
= "v%s1.64\t%%h1, %%A0";
20752 /* We have to use vldm / vstm in this case, since there is no
20753 pre-decrement form of the vld1 / vst1 instructions. */
20754 templ
= "v%smdb%%?\t%%0!, %%h1";
20755 ops
[0] = XEXP (addr
, 0);
20760 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20761 gcc_unreachable ();
20764 /* We have to use vldm / vstm for too-large modes. */
20767 if (nregs
> 4 || (TARGET_HAVE_MVE
&& nregs
>= 2))
20768 templ
= "v%smia%%?\t%%m0, %%h1";
20770 templ
= "v%s1.64\t%%h1, %%A0";
20776 /* Fall through. */
20778 if (GET_CODE (addr
) == PLUS
)
20779 addr
= XEXP (addr
, 0);
20780 /* Fall through. */
20785 for (i
= 0; i
< nregs
; i
++)
20787 /* We're only using DImode here because it's a convenient
20789 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
20790 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
20791 if (reg_overlap_mentioned_p (ops
[0], mem
))
20793 gcc_assert (overlap
== -1);
20798 if (TARGET_HAVE_MVE
&& LABEL_REF_P (addr
))
20799 sprintf (buff
, "v%sr.64\t%%P0, %%1", load
? "ld" : "st");
20801 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
20802 output_asm_insn (buff
, ops
);
20807 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
20808 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
20809 if (TARGET_HAVE_MVE
&& LABEL_REF_P (addr
))
20810 sprintf (buff
, "v%sr.32\t%%P0, %%1", load
? "ld" : "st");
20812 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
20813 output_asm_insn (buff
, ops
);
20820 gcc_unreachable ();
20823 sprintf (buff
, templ
, load
? "ld" : "st");
20824 output_asm_insn (buff
, ops
);
20829 /* Compute and return the length of neon_mov<mode>, where <mode> is
20830 one of VSTRUCT modes: EI, OI, CI or XI. */
20832 arm_attr_length_move_neon (rtx_insn
*insn
)
20834 rtx reg
, mem
, addr
;
20838 extract_insn_cached (insn
);
20840 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
20842 mode
= GET_MODE (recog_data
.operand
[0]);
20853 gcc_unreachable ();
20857 load
= REG_P (recog_data
.operand
[0]);
20858 reg
= recog_data
.operand
[!load
];
20859 mem
= recog_data
.operand
[load
];
20861 gcc_assert (MEM_P (mem
));
20863 addr
= XEXP (mem
, 0);
20865 /* Strip off const from addresses like (const (plus (...))). */
20866 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
20867 addr
= XEXP (addr
, 0);
20869 if (LABEL_REF_P (addr
) || GET_CODE (addr
) == PLUS
)
20871 int insns
= REG_NREGS (reg
) / 2;
20878 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20882 arm_address_offset_is_imm (rtx_insn
*insn
)
20886 extract_insn_cached (insn
);
20888 if (REG_P (recog_data
.operand
[0]))
20891 mem
= recog_data
.operand
[0];
20893 gcc_assert (MEM_P (mem
));
20895 addr
= XEXP (mem
, 0);
20898 || (GET_CODE (addr
) == PLUS
20899 && REG_P (XEXP (addr
, 0))
20900 && CONST_INT_P (XEXP (addr
, 1))))
20906 /* Output an ADD r, s, #n where n may be too big for one instruction.
20907 If adding zero to one register, output nothing. */
20909 output_add_immediate (rtx
*operands
)
20911 HOST_WIDE_INT n
= INTVAL (operands
[2]);
20913 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
20916 output_multi_immediate (operands
,
20917 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20920 output_multi_immediate (operands
,
20921 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20928 /* Output a multiple immediate operation.
20929 OPERANDS is the vector of operands referred to in the output patterns.
20930 INSTR1 is the output pattern to use for the first constant.
20931 INSTR2 is the output pattern to use for subsequent constants.
20932 IMMED_OP is the index of the constant slot in OPERANDS.
20933 N is the constant value. */
20934 static const char *
20935 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
20936 int immed_op
, HOST_WIDE_INT n
)
20938 #if HOST_BITS_PER_WIDE_INT > 32
20944 /* Quick and easy output. */
20945 operands
[immed_op
] = const0_rtx
;
20946 output_asm_insn (instr1
, operands
);
20951 const char * instr
= instr1
;
20953 /* Note that n is never zero here (which would give no output). */
20954 for (i
= 0; i
< 32; i
+= 2)
20958 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
20959 output_asm_insn (instr
, operands
);
20969 /* Return the name of a shifter operation. */
20970 static const char *
20971 arm_shift_nmem(enum rtx_code code
)
20976 return ARM_LSL_NAME
;
20992 /* Return the appropriate ARM instruction for the operation code.
20993 The returned result should not be overwritten. OP is the rtx of the
20994 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20997 arithmetic_instr (rtx op
, int shift_first_arg
)
20999 switch (GET_CODE (op
))
21005 return shift_first_arg
? "rsb" : "sub";
21020 return arm_shift_nmem(GET_CODE(op
));
21023 gcc_unreachable ();
21027 /* Ensure valid constant shifts and return the appropriate shift mnemonic
21028 for the operation code. The returned result should not be overwritten.
21029 OP is the rtx code of the shift.
21030 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
21032 static const char *
21033 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
21036 enum rtx_code code
= GET_CODE (op
);
21041 if (!CONST_INT_P (XEXP (op
, 1)))
21043 output_operand_lossage ("invalid shift operand");
21048 *amountp
= 32 - INTVAL (XEXP (op
, 1));
21056 mnem
= arm_shift_nmem(code
);
21057 if (CONST_INT_P (XEXP (op
, 1)))
21059 *amountp
= INTVAL (XEXP (op
, 1));
21061 else if (REG_P (XEXP (op
, 1)))
21068 output_operand_lossage ("invalid shift operand");
21074 /* We never have to worry about the amount being other than a
21075 power of 2, since this case can never be reloaded from a reg. */
21076 if (!CONST_INT_P (XEXP (op
, 1)))
21078 output_operand_lossage ("invalid shift operand");
21082 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
21084 /* Amount must be a power of two. */
21085 if (*amountp
& (*amountp
- 1))
21087 output_operand_lossage ("invalid shift operand");
21091 *amountp
= exact_log2 (*amountp
);
21092 gcc_assert (IN_RANGE (*amountp
, 0, 31));
21093 return ARM_LSL_NAME
;
21096 output_operand_lossage ("invalid shift operand");
21100 /* This is not 100% correct, but follows from the desire to merge
21101 multiplication by a power of 2 with the recognizer for a
21102 shift. >=32 is not a valid shift for "lsl", so we must try and
21103 output a shift that produces the correct arithmetical result.
21104 Using lsr #32 is identical except for the fact that the carry bit
21105 is not set correctly if we set the flags; but we never use the
21106 carry bit from such an operation, so we can ignore that. */
21107 if (code
== ROTATERT
)
21108 /* Rotate is just modulo 32. */
21110 else if (*amountp
!= (*amountp
& 31))
21112 if (code
== ASHIFT
)
21117 /* Shifts of 0 are no-ops. */
21124 /* Output a .ascii pseudo-op, keeping track of lengths. This is
21125 because /bin/as is horribly restrictive. The judgement about
21126 whether or not each character is 'printable' (and can be output as
21127 is) or not (and must be printed with an octal escape) must be made
21128 with reference to the *host* character set -- the situation is
21129 similar to that discussed in the comments above pp_c_char in
21130 c-pretty-print.cc. */
21132 #define MAX_ASCII_LEN 51
21135 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
21138 int len_so_far
= 0;
21140 fputs ("\t.ascii\t\"", stream
);
21142 for (i
= 0; i
< len
; i
++)
21146 if (len_so_far
>= MAX_ASCII_LEN
)
21148 fputs ("\"\n\t.ascii\t\"", stream
);
21154 if (c
== '\\' || c
== '\"')
21156 putc ('\\', stream
);
21164 fprintf (stream
, "\\%03o", c
);
21169 fputs ("\"\n", stream
);
21173 /* Compute the register save mask for registers 0 through 12
21174 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
21176 static unsigned long
21177 arm_compute_save_reg0_reg12_mask (void)
21179 unsigned long func_type
= arm_current_func_type ();
21180 unsigned long save_reg_mask
= 0;
21183 if (IS_INTERRUPT (func_type
))
21185 unsigned int max_reg
;
21186 /* Interrupt functions must not corrupt any registers,
21187 even call clobbered ones. If this is a leaf function
21188 we can just examine the registers used by the RTL, but
21189 otherwise we have to assume that whatever function is
21190 called might clobber anything, and so we have to save
21191 all the call-clobbered registers as well. */
21192 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
21193 /* FIQ handlers have registers r8 - r12 banked, so
21194 we only need to check r0 - r7, Normal ISRs only
21195 bank r14 and r15, so we must check up to r12.
21196 r13 is the stack pointer which is always preserved,
21197 so we do not need to consider it here. */
21202 for (reg
= 0; reg
<= max_reg
; reg
++)
21203 if (reg_needs_saving_p (reg
))
21204 save_reg_mask
|= (1 << reg
);
21206 /* Also save the pic base register if necessary. */
21207 if (PIC_REGISTER_MAY_NEED_SAVING
21208 && crtl
->uses_pic_offset_table
)
21209 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21211 else if (IS_VOLATILE(func_type
))
21213 /* For noreturn functions we historically omitted register saves
21214 altogether. However this really messes up debugging. As a
21215 compromise save just the frame pointers. Combined with the link
21216 register saved elsewhere this should be sufficient to get
21218 if (frame_pointer_needed
)
21219 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21220 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
21221 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
21222 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
21223 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
21227 /* In the normal case we only need to save those registers
21228 which are call saved and which are used by this function. */
21229 for (reg
= 0; reg
<= 11; reg
++)
21230 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
21231 save_reg_mask
|= (1 << reg
);
21233 /* Handle the frame pointer as a special case. */
21234 if (frame_pointer_needed
)
21235 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21237 /* If we aren't loading the PIC register,
21238 don't stack it even though it may be live. */
21239 if (PIC_REGISTER_MAY_NEED_SAVING
21240 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
21241 || crtl
->uses_pic_offset_table
))
21242 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21244 /* The prologue will copy SP into R0, so save it. */
21245 if (IS_STACKALIGN (func_type
))
21246 save_reg_mask
|= 1;
21249 /* Save registers so the exception handler can modify them. */
21250 if (crtl
->calls_eh_return
)
21256 reg
= EH_RETURN_DATA_REGNO (i
);
21257 if (reg
== INVALID_REGNUM
)
21259 save_reg_mask
|= 1 << reg
;
21263 return save_reg_mask
;
21266 /* Return true if r3 is live at the start of the function. */
21269 arm_r3_live_at_start_p (void)
21271 /* Just look at cfg info, which is still close enough to correct at this
21272 point. This gives false positives for broken functions that might use
21273 uninitialized data that happens to be allocated in r3, but who cares? */
21274 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
21277 /* Compute the number of bytes used to store the static chain register on the
21278 stack, above the stack frame. We need to know this accurately to get the
21279 alignment of the rest of the stack frame correct. */
21282 arm_compute_static_chain_stack_bytes (void)
21284 /* Once the value is updated from the init value of -1, do not
21286 if (cfun
->machine
->static_chain_stack_bytes
!= -1)
21287 return cfun
->machine
->static_chain_stack_bytes
;
21289 /* See the defining assertion in arm_expand_prologue. */
21290 if (IS_NESTED (arm_current_func_type ())
21291 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21292 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21293 || flag_stack_clash_protection
)
21294 && !df_regs_ever_live_p (LR_REGNUM
)))
21295 && arm_r3_live_at_start_p ()
21296 && crtl
->args
.pretend_args_size
== 0)
21302 /* Compute a bit mask of which core registers need to be
21303 saved on the stack for the current function.
21304 This is used by arm_compute_frame_layout, which may add extra registers. */
21306 static unsigned long
21307 arm_compute_save_core_reg_mask (void)
21309 unsigned int save_reg_mask
= 0;
21310 unsigned long func_type
= arm_current_func_type ();
21313 if (IS_NAKED (func_type
))
21314 /* This should never really happen. */
21317 /* If we are creating a stack frame, then we must save the frame pointer,
21318 IP (which will hold the old stack pointer), LR and the PC. */
21319 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21321 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
21324 | (1 << PC_REGNUM
);
21326 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
21328 if (arm_current_function_pac_enabled_p ())
21329 save_reg_mask
|= 1 << IP_REGNUM
;
21331 /* Decide if we need to save the link register.
21332 Interrupt routines have their own banked link register,
21333 so they never need to save it.
21334 Otherwise if we do not use the link register we do not need to save
21335 it. If we are pushing other registers onto the stack however, we
21336 can save an instruction in the epilogue by pushing the link register
21337 now and then popping it back into the PC. This incurs extra memory
21338 accesses though, so we only do it when optimizing for size, and only
21339 if we know that we will not need a fancy return sequence. */
21340 if (df_regs_ever_live_p (LR_REGNUM
)
21343 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
21344 && !crtl
->tail_call_emit
21345 && !crtl
->calls_eh_return
))
21346 save_reg_mask
|= 1 << LR_REGNUM
;
21348 if (cfun
->machine
->lr_save_eliminated
)
21349 save_reg_mask
&= ~ (1 << LR_REGNUM
);
21351 if (TARGET_REALLY_IWMMXT
21352 && ((bit_count (save_reg_mask
)
21353 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
21354 arm_compute_static_chain_stack_bytes())
21357 /* The total number of registers that are going to be pushed
21358 onto the stack is odd. We need to ensure that the stack
21359 is 64-bit aligned before we start to save iWMMXt registers,
21360 and also before we start to create locals. (A local variable
21361 might be a double or long long which we will load/store using
21362 an iWMMXt instruction). Therefore we need to push another
21363 ARM register, so that the stack will be 64-bit aligned. We
21364 try to avoid using the arg registers (r0 -r3) as they might be
21365 used to pass values in a tail call. */
21366 for (reg
= 4; reg
<= 12; reg
++)
21367 if ((save_reg_mask
& (1 << reg
)) == 0)
21371 save_reg_mask
|= (1 << reg
);
21374 cfun
->machine
->sibcall_blocked
= 1;
21375 save_reg_mask
|= (1 << 3);
21379 /* We may need to push an additional register for use initializing the
21380 PIC base register. */
21381 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
21382 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
21384 reg
= thumb_find_work_register (1 << 4);
21385 if (!call_used_or_fixed_reg_p (reg
))
21386 save_reg_mask
|= (1 << reg
);
21389 return save_reg_mask
;
21392 /* Compute a bit mask of which core registers need to be
21393 saved on the stack for the current function. */
21394 static unsigned long
21395 thumb1_compute_save_core_reg_mask (void)
21397 unsigned long mask
;
21401 for (reg
= 0; reg
< 12; reg
++)
21402 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
21405 /* Handle the frame pointer as a special case. */
21406 if (frame_pointer_needed
)
21407 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21410 && !TARGET_SINGLE_PIC_BASE
21411 && arm_pic_register
!= INVALID_REGNUM
21412 && crtl
->uses_pic_offset_table
)
21413 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21415 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21416 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
21417 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
21419 /* LR will also be pushed if any lo regs are pushed. */
21420 if (mask
& 0xff || thumb_force_lr_save ())
21421 mask
|= (1 << LR_REGNUM
);
21423 bool call_clobbered_scratch
21424 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21425 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21427 /* Make sure we have a low work register if we need one. We will
21428 need one if we are going to push a high register, but we are not
21429 currently intending to push a low register. However if both the
21430 prologue and epilogue have a spare call-clobbered low register,
21431 then we won't need to find an additional work register. It does
21432 not need to be the same register in the prologue and
21434 if ((mask
& 0xff) == 0
21435 && !call_clobbered_scratch
21436 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
21438 /* Use thumb_find_work_register to choose which register
21439 we will use. If the register is live then we will
21440 have to push it. Use LAST_LO_REGNUM as our fallback
21441 choice for the register to select. */
21442 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
21443 /* Make sure the register returned by thumb_find_work_register is
21444 not part of the return value. */
21445 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
21446 reg
= LAST_LO_REGNUM
;
21448 if (callee_saved_reg_p (reg
))
21452 /* The 504 below is 8 bytes less than 512 because there are two possible
21453 alignment words. We can't tell here if they will be present or not so we
21454 have to play it safe and assume that they are. */
21455 if ((CALLER_INTERWORKING_SLOT_SIZE
+
21456 ROUND_UP_WORD (get_frame_size ()) +
21457 crtl
->outgoing_args_size
) >= 504)
21459 /* This is the same as the code in thumb1_expand_prologue() which
21460 determines which register to use for stack decrement. */
21461 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
21462 if (mask
& (1 << reg
))
21465 if (reg
> LAST_LO_REGNUM
)
21467 /* Make sure we have a register available for stack decrement. */
21468 mask
|= 1 << LAST_LO_REGNUM
;
21475 /* Return the number of bytes required to save VFP registers. */
21477 arm_get_vfp_saved_size (void)
21479 unsigned int regno
;
21484 /* Space for saved VFP registers. */
21485 if (TARGET_VFP_BASE
)
21488 for (regno
= FIRST_VFP_REGNUM
;
21489 regno
< LAST_VFP_REGNUM
;
21492 if (!reg_needs_saving_p (regno
) && !reg_needs_saving_p (regno
+ 1))
21496 /* Workaround ARM10 VFPr1 bug. */
21497 if (count
== 2 && !arm_arch6
)
21499 saved
+= count
* 8;
21508 if (count
== 2 && !arm_arch6
)
21510 saved
+= count
* 8;
21517 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21518 everything bar the final return instruction. If simple_return is true,
21519 then do not output epilogue, because it has already been emitted in RTL.
21521 Note: do not forget to update length attribute of corresponding insn pattern
21522 when changing assembly output (eg. length attribute of
21523 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21524 register clearing sequences). */
21526 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
21527 bool simple_return
)
21529 char conditional
[10];
21532 unsigned long live_regs_mask
;
21533 unsigned long func_type
;
21534 arm_stack_offsets
*offsets
;
21536 func_type
= arm_current_func_type ();
21538 if (IS_NAKED (func_type
))
21541 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
21543 /* If this function was declared non-returning, and we have
21544 found a tail call, then we have to trust that the called
21545 function won't return. */
21550 /* Otherwise, trap an attempted return by aborting. */
21552 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
21554 assemble_external_libcall (ops
[1]);
21555 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
21561 gcc_assert (!cfun
->calls_alloca
|| really_return
);
21563 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
21565 cfun
->machine
->return_used_this_function
= 1;
21567 offsets
= arm_get_frame_offsets ();
21568 live_regs_mask
= offsets
->saved_regs_mask
;
21570 if (!simple_return
&& live_regs_mask
)
21572 const char * return_reg
;
21574 /* If we do not have any special requirements for function exit
21575 (e.g. interworking) then we can load the return address
21576 directly into the PC. Otherwise we must load it into LR. */
21578 && !IS_CMSE_ENTRY (func_type
)
21579 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
21580 return_reg
= reg_names
[PC_REGNUM
];
21582 return_reg
= reg_names
[LR_REGNUM
];
21584 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
21586 /* There are three possible reasons for the IP register
21587 being saved. 1) a stack frame was created, in which case
21588 IP contains the old stack pointer, or 2) an ISR routine
21589 corrupted it, or 3) it was saved to align the stack on
21590 iWMMXt. In case 1, restore IP into SP, otherwise just
21592 if (frame_pointer_needed
)
21594 live_regs_mask
&= ~ (1 << IP_REGNUM
);
21595 live_regs_mask
|= (1 << SP_REGNUM
);
21598 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
21601 /* On some ARM architectures it is faster to use LDR rather than
21602 LDM to load a single register. On other architectures, the
21603 cost is the same. In 26 bit mode, or for exception handlers,
21604 we have to use LDM to load the PC so that the CPSR is also
21606 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
21607 if (live_regs_mask
== (1U << reg
))
21610 if (reg
<= LAST_ARM_REGNUM
21611 && (reg
!= LR_REGNUM
21613 || ! IS_INTERRUPT (func_type
)))
21615 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
21616 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
21623 /* Generate the load multiple instruction to restore the
21624 registers. Note we can get here, even if
21625 frame_pointer_needed is true, but only if sp already
21626 points to the base of the saved core registers. */
21627 if (live_regs_mask
& (1 << SP_REGNUM
))
21629 unsigned HOST_WIDE_INT stack_adjust
;
21631 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
21632 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
21634 if (stack_adjust
&& arm_arch5t
&& TARGET_ARM
)
21635 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
21638 /* If we can't use ldmib (SA110 bug),
21639 then try to pop r3 instead. */
21641 live_regs_mask
|= 1 << 3;
21643 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
21646 /* For interrupt returns we have to use an LDM rather than
21647 a POP so that we can use the exception return variant. */
21648 else if (IS_INTERRUPT (func_type
))
21649 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
21651 sprintf (instr
, "pop%s\t{", conditional
);
21653 p
= instr
+ strlen (instr
);
21655 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
21656 if (live_regs_mask
& (1 << reg
))
21658 int l
= strlen (reg_names
[reg
]);
21664 memcpy (p
, ", ", 2);
21668 memcpy (p
, "%|", 2);
21669 memcpy (p
+ 2, reg_names
[reg
], l
);
21673 if (live_regs_mask
& (1 << LR_REGNUM
))
21675 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
21676 /* If returning from an interrupt, restore the CPSR. */
21677 if (IS_INTERRUPT (func_type
))
21684 output_asm_insn (instr
, & operand
);
21686 /* See if we need to generate an extra instruction to
21687 perform the actual function return. */
21689 && func_type
!= ARM_FT_INTERWORKED
21690 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
21692 /* The return has already been handled
21693 by loading the LR into the PC. */
21700 switch ((int) ARM_FUNC_TYPE (func_type
))
21704 /* ??? This is wrong for unified assembly syntax. */
21705 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
21708 case ARM_FT_INTERWORKED
:
21709 gcc_assert (arm_arch5t
|| arm_arch4t
);
21710 sprintf (instr
, "bx%s\t%%|lr", conditional
);
21713 case ARM_FT_EXCEPTION
:
21714 /* ??? This is wrong for unified assembly syntax. */
21715 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
21719 if (IS_CMSE_ENTRY (func_type
))
21721 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21722 emitted by cmse_nonsecure_entry_clear_before_return () and the
21723 VSTR/VLDR instructions in the prologue and epilogue. */
21724 if (!TARGET_HAVE_FPCXT_CMSE
)
21726 /* Check if we have to clear the 'GE bits' which is only used if
21727 parallel add and subtraction instructions are available. */
21728 if (TARGET_INT_SIMD
)
21729 snprintf (instr
, sizeof (instr
),
21730 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
21732 snprintf (instr
, sizeof (instr
),
21733 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
21735 output_asm_insn (instr
, & operand
);
21736 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21738 if (TARGET_HARD_FLOAT
)
21740 /* Clear the cumulative exception-status bits (0-4,7) and
21741 the condition code bits (28-31) of the FPSCR. We need
21742 to remember to clear the first scratch register used
21743 (IP) and save and restore the second (r4).
21745 Important note: the length of the
21746 thumb2_cmse_entry_return insn pattern must account for
21747 the size of the below instructions. */
21748 output_asm_insn ("push\t{%|r4}", & operand
);
21749 output_asm_insn ("vmrs\t%|ip, fpscr", & operand
);
21750 output_asm_insn ("movw\t%|r4, #65376", & operand
);
21751 output_asm_insn ("movt\t%|r4, #4095", & operand
);
21752 output_asm_insn ("and\t%|ip, %|r4", & operand
);
21753 output_asm_insn ("vmsr\tfpscr, %|ip", & operand
);
21754 output_asm_insn ("pop\t{%|r4}", & operand
);
21755 output_asm_insn ("mov\t%|ip, %|lr", & operand
);
21758 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
21760 /* Use bx if it's available. */
21761 else if (arm_arch5t
|| arm_arch4t
)
21762 sprintf (instr
, "bx%s\t%%|lr", conditional
);
21764 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
21768 output_asm_insn (instr
, & operand
);
21774 /* Output in FILE asm statements needed to declare the NAME of the function
21775 defined by its DECL node. */
21778 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
21780 size_t cmse_name_len
;
21781 char *cmse_name
= 0;
21782 char cmse_prefix
[] = "__acle_se_";
21784 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21785 extra function label for each function with the 'cmse_nonsecure_entry'
21786 attribute. This extra function label should be prepended with
21787 '__acle_se_', telling the linker that it needs to create secure gateway
21788 veneers for this function. */
21789 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
21790 DECL_ATTRIBUTES (decl
)))
21792 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
21793 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
21794 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
21795 targetm
.asm_out
.globalize_label (file
, cmse_name
);
21797 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
21798 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
21801 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
21802 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21803 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21804 ASM_OUTPUT_LABEL (file
, name
);
21807 ASM_OUTPUT_LABEL (file
, cmse_name
);
21809 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
21812 /* Write the function name into the code section, directly preceding
21813 the function prologue.
21815 Code will be output similar to this:
21817 .ascii "arm_poke_function_name", 0
21820 .word 0xff000000 + (t1 - t0)
21821 arm_poke_function_name
21823 stmfd sp!, {fp, ip, lr, pc}
21826 When performing a stack backtrace, code can inspect the value
21827 of 'pc' stored at 'fp' + 0. If the trace function then looks
21828 at location pc - 12 and the top 8 bits are set, then we know
21829 that there is a function name embedded immediately preceding this
21830 location and has length ((pc[-3]) & 0xff000000).
21832 We assume that pc is declared as a pointer to an unsigned long.
21834 It is of no benefit to output the function name if we are assembling
21835 a leaf function. These function types will not contain a stack
21836 backtrace structure, therefore it is not possible to determine the
21839 arm_poke_function_name (FILE *stream
, const char *name
)
21841 unsigned long alignlength
;
21842 unsigned long length
;
21845 length
= strlen (name
) + 1;
21846 alignlength
= ROUND_UP_WORD (length
);
21848 ASM_OUTPUT_ASCII (stream
, name
, length
);
21849 ASM_OUTPUT_ALIGN (stream
, 2);
21850 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
21851 assemble_aligned_integer (UNITS_PER_WORD
, x
);
21854 /* Place some comments into the assembler stream
21855 describing the current function. */
21857 arm_output_function_prologue (FILE *f
)
21859 unsigned long func_type
;
21861 /* Sanity check. */
21862 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
21864 func_type
= arm_current_func_type ();
21866 switch ((int) ARM_FUNC_TYPE (func_type
))
21869 case ARM_FT_NORMAL
:
21871 case ARM_FT_INTERWORKED
:
21872 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
21875 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
21878 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
21880 case ARM_FT_EXCEPTION
:
21881 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
21885 if (IS_NAKED (func_type
))
21886 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21888 if (IS_VOLATILE (func_type
))
21889 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
21891 if (IS_NESTED (func_type
))
21892 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
21893 if (IS_STACKALIGN (func_type
))
21894 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21895 if (IS_CMSE_ENTRY (func_type
))
21896 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
21898 asm_fprintf (f
, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21899 (HOST_WIDE_INT
) crtl
->args
.size
,
21900 crtl
->args
.pretend_args_size
,
21901 (HOST_WIDE_INT
) get_frame_size ());
21903 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21904 frame_pointer_needed
,
21905 cfun
->machine
->uses_anonymous_args
);
21907 if (cfun
->machine
->lr_save_eliminated
)
21908 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
21910 if (crtl
->calls_eh_return
)
21911 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
21916 arm_output_function_epilogue (FILE *)
21918 arm_stack_offsets
*offsets
;
21924 /* Emit any call-via-reg trampolines that are needed for v4t support
21925 of call_reg and call_value_reg type insns. */
21926 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
21928 rtx label
= cfun
->machine
->call_via
[regno
];
21932 switch_to_section (function_section (current_function_decl
));
21933 targetm
.asm_out
.internal_label (asm_out_file
, "L",
21934 CODE_LABEL_NUMBER (label
));
21935 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
21939 /* ??? Probably not safe to set this here, since it assumes that a
21940 function will be emitted as assembly immediately after we generate
21941 RTL for it. This does not happen for inline functions. */
21942 cfun
->machine
->return_used_this_function
= 0;
21944 else /* TARGET_32BIT */
21946 /* We need to take into account any stack-frame rounding. */
21947 offsets
= arm_get_frame_offsets ();
21949 gcc_assert (!use_return_insn (FALSE
, NULL
)
21950 || (cfun
->machine
->return_used_this_function
!= 0)
21951 || offsets
->saved_regs
== offsets
->outgoing_args
21952 || frame_pointer_needed
);
21956 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21957 STR and STRD. If an even number of registers are being pushed, one
21958 or more STRD patterns are created for each register pair. If an
21959 odd number of registers are pushed, emit an initial STR followed by
21960 as many STRD instructions as are needed. This works best when the
21961 stack is initially 64-bit aligned (the normal case), since it
21962 ensures that each STRD is also 64-bit aligned. */
21964 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
21969 rtx par
= NULL_RTX
;
21970 rtx dwarf
= NULL_RTX
;
21974 num_regs
= bit_count (saved_regs_mask
);
21976 /* Must be at least one register to save, and can't save SP or PC. */
21977 gcc_assert (num_regs
> 0 && num_regs
<= 14);
21978 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
21979 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
21981 /* Create sequence for DWARF info. All the frame-related data for
21982 debugging is held in this wrapper. */
21983 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
21985 /* Describe the stack adjustment. */
21986 tmp
= gen_rtx_SET (stack_pointer_rtx
,
21987 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
21988 RTX_FRAME_RELATED_P (tmp
) = 1;
21989 XVECEXP (dwarf
, 0, 0) = tmp
;
21991 /* Find the first register. */
21992 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
21997 /* If there's an odd number of registers to push. Start off by
21998 pushing a single register. This ensures that subsequent strd
21999 operations are dword aligned (assuming that SP was originally
22000 64-bit aligned). */
22001 if ((num_regs
& 1) != 0)
22003 rtx reg
, mem
, insn
;
22005 reg
= gen_rtx_REG (SImode
, regno
);
22007 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
22008 stack_pointer_rtx
));
22010 mem
= gen_frame_mem (Pmode
,
22012 (Pmode
, stack_pointer_rtx
,
22013 plus_constant (Pmode
, stack_pointer_rtx
,
22016 tmp
= gen_rtx_SET (mem
, reg
);
22017 RTX_FRAME_RELATED_P (tmp
) = 1;
22018 insn
= emit_insn (tmp
);
22019 RTX_FRAME_RELATED_P (insn
) = 1;
22020 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22021 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
22022 RTX_FRAME_RELATED_P (tmp
) = 1;
22025 XVECEXP (dwarf
, 0, i
) = tmp
;
22029 while (i
< num_regs
)
22030 if (saved_regs_mask
& (1 << regno
))
22032 rtx reg1
, reg2
, mem1
, mem2
;
22033 rtx tmp0
, tmp1
, tmp2
;
22036 /* Find the register to pair with this one. */
22037 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
22041 reg1
= gen_rtx_REG (SImode
, regno
);
22042 reg2
= gen_rtx_REG (SImode
, regno2
);
22049 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22052 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22054 -4 * (num_regs
- 1)));
22055 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
22056 plus_constant (Pmode
, stack_pointer_rtx
,
22058 tmp1
= gen_rtx_SET (mem1
, reg1
);
22059 tmp2
= gen_rtx_SET (mem2
, reg2
);
22060 RTX_FRAME_RELATED_P (tmp0
) = 1;
22061 RTX_FRAME_RELATED_P (tmp1
) = 1;
22062 RTX_FRAME_RELATED_P (tmp2
) = 1;
22063 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
22064 XVECEXP (par
, 0, 0) = tmp0
;
22065 XVECEXP (par
, 0, 1) = tmp1
;
22066 XVECEXP (par
, 0, 2) = tmp2
;
22067 insn
= emit_insn (par
);
22068 RTX_FRAME_RELATED_P (insn
) = 1;
22069 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22073 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22076 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22079 tmp1
= gen_rtx_SET (mem1
, reg1
);
22080 tmp2
= gen_rtx_SET (mem2
, reg2
);
22081 RTX_FRAME_RELATED_P (tmp1
) = 1;
22082 RTX_FRAME_RELATED_P (tmp2
) = 1;
22083 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22084 XVECEXP (par
, 0, 0) = tmp1
;
22085 XVECEXP (par
, 0, 1) = tmp2
;
22089 /* Create unwind information. This is an approximation. */
22090 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
22091 plus_constant (Pmode
,
22095 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
22096 plus_constant (Pmode
,
22101 RTX_FRAME_RELATED_P (tmp1
) = 1;
22102 RTX_FRAME_RELATED_P (tmp2
) = 1;
22103 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
22104 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
22106 regno
= regno2
+ 1;
22114 /* STRD in ARM mode requires consecutive registers. This function emits STRD
22115 whenever possible, otherwise it emits single-word stores. The first store
22116 also allocates stack space for all saved registers, using writeback with
22117 post-addressing mode. All other stores use offset addressing. If no STRD
22118 can be emitted, this function emits a sequence of single-word stores,
22119 and not an STM as before, because single-word stores provide more freedom
22120 scheduling and can be turned into an STM by peephole optimizations. */
22122 arm_emit_strd_push (unsigned long saved_regs_mask
)
22125 int i
, j
, dwarf_index
= 0;
22127 rtx dwarf
= NULL_RTX
;
22128 rtx insn
= NULL_RTX
;
22131 /* TODO: A more efficient code can be emitted by changing the
22132 layout, e.g., first push all pairs that can use STRD to keep the
22133 stack aligned, and then push all other registers. */
22134 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22135 if (saved_regs_mask
& (1 << i
))
22138 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
22139 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
22140 gcc_assert (num_regs
> 0);
22142 /* Create sequence for DWARF info. */
22143 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
22145 /* For dwarf info, we generate explicit stack update. */
22146 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22147 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
22148 RTX_FRAME_RELATED_P (tmp
) = 1;
22149 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22151 /* Save registers. */
22152 offset
= - 4 * num_regs
;
22154 while (j
<= LAST_ARM_REGNUM
)
22155 if (saved_regs_mask
& (1 << j
))
22158 && (saved_regs_mask
& (1 << (j
+ 1))))
22160 /* Current register and previous register form register pair for
22161 which STRD can be generated. */
22164 /* Allocate stack space for all saved registers. */
22165 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22166 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
22167 mem
= gen_frame_mem (DImode
, tmp
);
22170 else if (offset
> 0)
22171 mem
= gen_frame_mem (DImode
,
22172 plus_constant (Pmode
,
22176 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
22178 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
22179 RTX_FRAME_RELATED_P (tmp
) = 1;
22180 tmp
= emit_insn (tmp
);
22182 /* Record the first store insn. */
22183 if (dwarf_index
== 1)
22186 /* Generate dwarf info. */
22187 mem
= gen_frame_mem (SImode
,
22188 plus_constant (Pmode
,
22191 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22192 RTX_FRAME_RELATED_P (tmp
) = 1;
22193 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22195 mem
= gen_frame_mem (SImode
,
22196 plus_constant (Pmode
,
22199 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
22200 RTX_FRAME_RELATED_P (tmp
) = 1;
22201 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22208 /* Emit a single word store. */
22211 /* Allocate stack space for all saved registers. */
22212 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22213 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
22214 mem
= gen_frame_mem (SImode
, tmp
);
22217 else if (offset
> 0)
22218 mem
= gen_frame_mem (SImode
,
22219 plus_constant (Pmode
,
22223 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
22225 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22226 RTX_FRAME_RELATED_P (tmp
) = 1;
22227 tmp
= emit_insn (tmp
);
22229 /* Record the first store insn. */
22230 if (dwarf_index
== 1)
22233 /* Generate dwarf info. */
22234 mem
= gen_frame_mem (SImode
,
22235 plus_constant(Pmode
,
22238 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22239 RTX_FRAME_RELATED_P (tmp
) = 1;
22240 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22249 /* Attach dwarf info to the first insn we generate. */
22250 gcc_assert (insn
!= NULL_RTX
);
22251 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22252 RTX_FRAME_RELATED_P (insn
) = 1;
22255 /* Generate and emit an insn that we will recognize as a push_multi.
22256 Unfortunately, since this insn does not reflect very well the actual
22257 semantics of the operation, we need to annotate the insn for the benefit
22258 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
22259 MASK for registers that should be annotated for DWARF2 frame unwind
22262 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
22265 int num_dwarf_regs
= 0;
22269 int dwarf_par_index
;
22272 /* We don't record the PC in the dwarf frame information. */
22273 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
22275 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22277 if (mask
& (1 << i
))
22279 if (dwarf_regs_mask
& (1 << i
))
22283 gcc_assert (num_regs
&& num_regs
<= 16);
22284 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
22286 /* For the body of the insn we are going to generate an UNSPEC in
22287 parallel with several USEs. This allows the insn to be recognized
22288 by the push_multi pattern in the arm.md file.
22290 The body of the insn looks something like this:
22293 (set (mem:BLK (pre_modify:SI (reg:SI sp)
22294 (const_int:SI <num>)))
22295 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22301 For the frame note however, we try to be more explicit and actually
22302 show each register being stored into the stack frame, plus a (single)
22303 decrement of the stack pointer. We do it this way in order to be
22304 friendly to the stack unwinding code, which only wants to see a single
22305 stack decrement per instruction. The RTL we generate for the note looks
22306 something like this:
22309 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22310 (set (mem:SI (reg:SI sp)) (reg:SI r4))
22311 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22312 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22316 FIXME:: In an ideal world the PRE_MODIFY would not exist and
22317 instead we'd have a parallel expression detailing all
22318 the stores to the various memory addresses so that debug
22319 information is more up-to-date. Remember however while writing
22320 this to take care of the constraints with the push instruction.
22322 Note also that this has to be taken care of for the VFP registers.
22324 For more see PR43399. */
22326 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
22327 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
22328 dwarf_par_index
= 1;
22330 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22332 if (mask
& (1 << i
))
22334 /* NOTE: Dwarf code emitter handle reg-reg copies correctly and in the
22335 following example reg-reg copy of SP to IP register is handled
22336 through .cfi_def_cfa_register directive and the .cfi_offset
22337 directive for IP register is skipped by dwarf code emitter.
22340 .cfi_def_cfa_register 12
22341 push {fp, ip, lr, pc}
22342 .cfi_offset 11, -16
22343 .cfi_offset 13, -12
22346 Where as Arm-specific .save directive handling is different to that
22347 of dwarf code emitter and it doesn't consider reg-reg copies while
22348 updating the register list. When PACBTI is enabled we manually
22349 updated the .save directive register list to use "ra_auth_code"
22350 (pseduo register 143) instead of IP register as shown in following
22354 .cfi_register 143, 12
22355 push {r3, r7, ip, lr}
22356 .save {r3, r7, ra_auth_code, lr}
22358 rtx dwarf_reg
= reg
= gen_rtx_REG (SImode
, i
);
22359 if (arm_current_function_pac_enabled_p () && i
== IP_REGNUM
)
22360 dwarf_reg
= gen_rtx_REG (SImode
, RA_AUTH_CODE
);
22362 XVECEXP (par
, 0, 0)
22363 = gen_rtx_SET (gen_frame_mem
22365 gen_rtx_PRE_MODIFY (Pmode
,
22368 (Pmode
, stack_pointer_rtx
,
22371 gen_rtx_UNSPEC (BLKmode
,
22372 gen_rtvec (1, reg
),
22373 UNSPEC_PUSH_MULT
));
22375 if (dwarf_regs_mask
& (1 << i
))
22377 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
22379 RTX_FRAME_RELATED_P (tmp
) = 1;
22380 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
22387 for (j
= 1, i
++; j
< num_regs
; i
++)
22389 if (mask
& (1 << i
))
22391 rtx dwarf_reg
= reg
= gen_rtx_REG (SImode
, i
);
22392 if (arm_current_function_pac_enabled_p () && i
== IP_REGNUM
)
22393 dwarf_reg
= gen_rtx_REG (SImode
, RA_AUTH_CODE
);
22395 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
22397 if (dwarf_regs_mask
& (1 << i
))
22400 = gen_rtx_SET (gen_frame_mem
22402 plus_constant (Pmode
, stack_pointer_rtx
,
22405 RTX_FRAME_RELATED_P (tmp
) = 1;
22406 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
22413 par
= emit_insn (par
);
22415 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22416 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
22417 RTX_FRAME_RELATED_P (tmp
) = 1;
22418 XVECEXP (dwarf
, 0, 0) = tmp
;
22420 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
22425 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22426 SIZE is the offset to be adjusted.
22427 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22429 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
22433 RTX_FRAME_RELATED_P (insn
) = 1;
22434 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
22435 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
22438 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22439 SAVED_REGS_MASK shows which registers need to be restored.
22441 Unfortunately, since this insn does not reflect very well the actual
22442 semantics of the operation, we need to annotate the insn for the benefit
22443 of DWARF2 frame unwind information. */
22445 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
22450 rtx dwarf
= NULL_RTX
;
22452 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
22456 offset_adj
= return_in_pc
? 1 : 0;
22457 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22458 if (saved_regs_mask
& (1 << i
))
22461 gcc_assert (num_regs
&& num_regs
<= 16);
22463 /* If SP is in reglist, then we don't emit SP update insn. */
22464 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
22466 /* The parallel needs to hold num_regs SETs
22467 and one SET for the stack update. */
22468 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
22471 XVECEXP (par
, 0, 0) = ret_rtx
;
22475 /* Increment the stack pointer, based on there being
22476 num_regs 4-byte registers to restore. */
22477 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22478 plus_constant (Pmode
,
22481 RTX_FRAME_RELATED_P (tmp
) = 1;
22482 XVECEXP (par
, 0, offset_adj
) = tmp
;
22485 /* Now restore every reg, which may include PC. */
22486 for (j
= 0, i
= 0; j
< num_regs
; i
++)
22487 if (saved_regs_mask
& (1 << i
))
22489 rtx dwarf_reg
= reg
= gen_rtx_REG (SImode
, i
);
22490 if (arm_current_function_pac_enabled_p () && i
== IP_REGNUM
)
22491 dwarf_reg
= gen_rtx_REG (SImode
, RA_AUTH_CODE
);
22492 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
22494 /* Emit single load with writeback. */
22495 tmp
= gen_frame_mem (SImode
,
22496 gen_rtx_POST_INC (Pmode
,
22497 stack_pointer_rtx
));
22498 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
22499 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, dwarf_reg
,
22504 tmp
= gen_rtx_SET (reg
,
22507 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
22508 RTX_FRAME_RELATED_P (tmp
) = 1;
22509 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
22511 /* We need to maintain a sequence for DWARF info too. As dwarf info
22512 should not have PC, skip PC. */
22513 if (i
!= PC_REGNUM
)
22514 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, dwarf_reg
, dwarf
);
22520 par
= emit_jump_insn (par
);
22522 par
= emit_insn (par
);
22524 REG_NOTES (par
) = dwarf
;
22526 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
22527 stack_pointer_rtx
, stack_pointer_rtx
);
22530 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22531 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22533 Unfortunately, since this insn does not reflect very well the actual
22534 semantics of the operation, we need to annotate the insn for the benefit
22535 of DWARF2 frame unwind information. */
22537 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
22541 rtx dwarf
= NULL_RTX
;
22544 gcc_assert (num_regs
&& num_regs
<= 32);
22546 /* Workaround ARM10 VFPr1 bug. */
22547 if (num_regs
== 2 && !arm_arch6
)
22549 if (first_reg
== 15)
22555 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22556 there could be up to 32 D-registers to restore.
22557 If there are more than 16 D-registers, make two recursive calls,
22558 each of which emits one pop_multi instruction. */
22561 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
22562 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
22566 /* The parallel needs to hold num_regs SETs
22567 and one SET for the stack update. */
22568 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
22570 /* Increment the stack pointer, based on there being
22571 num_regs 8-byte registers to restore. */
22572 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
22573 RTX_FRAME_RELATED_P (tmp
) = 1;
22574 XVECEXP (par
, 0, 0) = tmp
;
22576 /* Now show every reg that will be restored, using a SET for each. */
22577 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
22579 reg
= gen_rtx_REG (DFmode
, i
);
22581 tmp
= gen_rtx_SET (reg
,
22584 plus_constant (Pmode
, base_reg
, 8 * j
)));
22585 RTX_FRAME_RELATED_P (tmp
) = 1;
22586 XVECEXP (par
, 0, j
+ 1) = tmp
;
22588 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22593 par
= emit_insn (par
);
22594 REG_NOTES (par
) = dwarf
;
22596 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22597 if (REGNO (base_reg
) == IP_REGNUM
)
22599 RTX_FRAME_RELATED_P (par
) = 1;
22600 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
22603 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
22604 base_reg
, base_reg
);
22607 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22608 number of registers are being popped, multiple LDRD patterns are created for
22609 all register pairs. If odd number of registers are popped, last register is
22610 loaded by using LDR pattern. */
22612 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
22616 rtx par
= NULL_RTX
;
22617 rtx dwarf
= NULL_RTX
;
22618 rtx tmp
, reg
, tmp1
;
22619 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
22621 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22622 if (saved_regs_mask
& (1 << i
))
22625 gcc_assert (num_regs
&& num_regs
<= 16);
22627 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22628 to be popped. So, if num_regs is even, now it will become odd,
22629 and we can generate pop with PC. If num_regs is odd, it will be
22630 even now, and ldr with return can be generated for PC. */
22634 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
22636 /* Var j iterates over all the registers to gather all the registers in
22637 saved_regs_mask. Var i gives index of saved registers in stack frame.
22638 A PARALLEL RTX of register-pair is created here, so that pattern for
22639 LDRD can be matched. As PC is always last register to be popped, and
22640 we have already decremented num_regs if PC, we don't have to worry
22641 about PC in this loop. */
22642 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
22643 if (saved_regs_mask
& (1 << j
))
22645 /* Create RTX for memory load. */
22646 reg
= gen_rtx_REG (SImode
, j
);
22647 tmp
= gen_rtx_SET (reg
,
22648 gen_frame_mem (SImode
,
22649 plus_constant (Pmode
,
22650 stack_pointer_rtx
, 4 * i
)));
22651 RTX_FRAME_RELATED_P (tmp
) = 1;
22655 /* When saved-register index (i) is even, the RTX to be emitted is
22656 yet to be created. Hence create it first. The LDRD pattern we
22657 are generating is :
22658 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22659 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22660 where target registers need not be consecutive. */
22661 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22665 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22666 added as 0th element and if i is odd, reg_i is added as 1st element
22667 of LDRD pattern shown above. */
22668 XVECEXP (par
, 0, (i
% 2)) = tmp
;
22669 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22673 /* When saved-register index (i) is odd, RTXs for both the registers
22674 to be loaded are generated in above given LDRD pattern, and the
22675 pattern can be emitted now. */
22676 par
= emit_insn (par
);
22677 REG_NOTES (par
) = dwarf
;
22678 RTX_FRAME_RELATED_P (par
) = 1;
22684 /* If the number of registers pushed is odd AND return_in_pc is false OR
22685 number of registers are even AND return_in_pc is true, last register is
22686 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22687 then LDR with post increment. */
22689 /* Increment the stack pointer, based on there being
22690 num_regs 4-byte registers to restore. */
22691 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22692 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
22693 RTX_FRAME_RELATED_P (tmp
) = 1;
22694 tmp
= emit_insn (tmp
);
22697 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
22698 stack_pointer_rtx
, stack_pointer_rtx
);
22703 if (((num_regs
% 2) == 1 && !return_in_pc
)
22704 || ((num_regs
% 2) == 0 && return_in_pc
))
22706 /* Scan for the single register to be popped. Skip until the saved
22707 register is found. */
22708 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
22710 /* Gen LDR with post increment here. */
22711 tmp1
= gen_rtx_MEM (SImode
,
22712 gen_rtx_POST_INC (SImode
,
22713 stack_pointer_rtx
));
22714 set_mem_alias_set (tmp1
, get_frame_alias_set ());
22716 reg
= gen_rtx_REG (SImode
, j
);
22717 tmp
= gen_rtx_SET (reg
, tmp1
);
22718 RTX_FRAME_RELATED_P (tmp
) = 1;
22719 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22723 /* If return_in_pc, j must be PC_REGNUM. */
22724 gcc_assert (j
== PC_REGNUM
);
22725 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22726 XVECEXP (par
, 0, 0) = ret_rtx
;
22727 XVECEXP (par
, 0, 1) = tmp
;
22728 par
= emit_jump_insn (par
);
22732 par
= emit_insn (tmp
);
22733 REG_NOTES (par
) = dwarf
;
22734 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
22735 stack_pointer_rtx
, stack_pointer_rtx
);
22739 else if ((num_regs
% 2) == 1 && return_in_pc
)
22741 /* There are 2 registers to be popped. So, generate the pattern
22742 pop_multiple_with_stack_update_and_return to pop in PC. */
22743 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
22749 /* LDRD in ARM mode needs consecutive registers as operands. This function
22750 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22751 offset addressing and then generates one separate stack udpate. This provides
22752 more scheduling freedom, compared to writeback on every load. However,
22753 if the function returns using load into PC directly
22754 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22755 before the last load. TODO: Add a peephole optimization to recognize
22756 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22757 peephole optimization to merge the load at stack-offset zero
22758 with the stack update instruction using load with writeback
22759 in post-index addressing mode. */
22761 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
22765 rtx par
= NULL_RTX
;
22766 rtx dwarf
= NULL_RTX
;
22769 /* Restore saved registers. */
22770 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
22772 while (j
<= LAST_ARM_REGNUM
)
22773 if (saved_regs_mask
& (1 << j
))
22776 && (saved_regs_mask
& (1 << (j
+ 1)))
22777 && (j
+ 1) != PC_REGNUM
)
22779 /* Current register and next register form register pair for which
22780 LDRD can be generated. PC is always the last register popped, and
22781 we handle it separately. */
22783 mem
= gen_frame_mem (DImode
,
22784 plus_constant (Pmode
,
22788 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
22790 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
22791 tmp
= emit_insn (tmp
);
22792 RTX_FRAME_RELATED_P (tmp
) = 1;
22794 /* Generate dwarf info. */
22796 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22797 gen_rtx_REG (SImode
, j
),
22799 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22800 gen_rtx_REG (SImode
, j
+ 1),
22803 REG_NOTES (tmp
) = dwarf
;
22808 else if (j
!= PC_REGNUM
)
22810 /* Emit a single word load. */
22812 mem
= gen_frame_mem (SImode
,
22813 plus_constant (Pmode
,
22817 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
22819 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
22820 tmp
= emit_insn (tmp
);
22821 RTX_FRAME_RELATED_P (tmp
) = 1;
22823 /* Generate dwarf info. */
22824 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
22825 gen_rtx_REG (SImode
, j
),
22831 else /* j == PC_REGNUM */
22837 /* Update the stack. */
22840 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22841 plus_constant (Pmode
,
22844 tmp
= emit_insn (tmp
);
22845 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
22846 stack_pointer_rtx
, stack_pointer_rtx
);
22850 if (saved_regs_mask
& (1 << PC_REGNUM
))
22852 /* Only PC is to be popped. */
22853 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22854 XVECEXP (par
, 0, 0) = ret_rtx
;
22855 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
22856 gen_frame_mem (SImode
,
22857 gen_rtx_POST_INC (SImode
,
22858 stack_pointer_rtx
)));
22859 RTX_FRAME_RELATED_P (tmp
) = 1;
22860 XVECEXP (par
, 0, 1) = tmp
;
22861 par
= emit_jump_insn (par
);
22863 /* Generate dwarf info. */
22864 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22865 gen_rtx_REG (SImode
, PC_REGNUM
),
22867 REG_NOTES (par
) = dwarf
;
22868 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
22869 stack_pointer_rtx
, stack_pointer_rtx
);
22873 /* Calculate the size of the return value that is passed in registers. */
22875 arm_size_return_regs (void)
22879 if (crtl
->return_rtx
!= 0)
22880 mode
= GET_MODE (crtl
->return_rtx
);
22882 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
22884 return GET_MODE_SIZE (mode
);
22887 /* Return true if the current function needs to save/restore LR. */
22889 thumb_force_lr_save (void)
22891 return !cfun
->machine
->lr_save_eliminated
22893 || thumb_far_jump_used_p ()
22894 || df_regs_ever_live_p (LR_REGNUM
));
22897 /* We do not know if r3 will be available because
22898 we do have an indirect tailcall happening in this
22899 particular case. */
22901 is_indirect_tailcall_p (rtx call
)
22903 rtx pat
= PATTERN (call
);
22905 /* Indirect tail call. */
22906 pat
= XVECEXP (pat
, 0, 0);
22907 if (GET_CODE (pat
) == SET
)
22908 pat
= SET_SRC (pat
);
22910 pat
= XEXP (XEXP (pat
, 0), 0);
22911 return REG_P (pat
);
22914 /* Return true if r3 is used by any of the tail call insns in the
22915 current function. */
22917 any_sibcall_could_use_r3 (void)
22922 if (!crtl
->tail_call_emit
)
22924 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
22925 if (e
->flags
& EDGE_SIBCALL
)
22927 rtx_insn
*call
= BB_END (e
->src
);
22928 if (!CALL_P (call
))
22929 call
= prev_nonnote_nondebug_insn (call
);
22930 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
22931 if (find_regno_fusage (call
, USE
, 3)
22932 || is_indirect_tailcall_p (call
))
22939 /* Compute the distance from register FROM to register TO.
22940 These can be the arg pointer (26), the soft frame pointer (25),
22941 the stack pointer (13) or the hard frame pointer (11).
22942 In thumb mode r7 is used as the soft frame pointer, if needed.
22943 Typical stack layout looks like this:
22945 old stack pointer -> | |
22948 | | saved arguments for
22949 | | vararg functions
22952 hard FP & arg pointer -> | | \
22960 soft frame pointer -> | | /
22965 locals base pointer -> | | /
22970 current stack pointer -> | | /
22973 For a given function some or all of these stack components
22974 may not be needed, giving rise to the possibility of
22975 eliminating some of the registers.
22977 The values returned by this function must reflect the behavior
22978 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22980 The sign of the number returned reflects the direction of stack
22981 growth, so the values are positive for all eliminations except
22982 from the soft frame pointer to the hard frame pointer.
22984 SFP may point just inside the local variables block to ensure correct
22988 /* Return cached stack offsets. */
22990 static arm_stack_offsets
*
22991 arm_get_frame_offsets (void)
22993 struct arm_stack_offsets
*offsets
;
22995 offsets
= &cfun
->machine
->stack_offsets
;
23001 /* Calculate stack offsets. These are used to calculate register elimination
23002 offsets and in prologue/epilogue code. Also calculates which registers
23003 should be saved. */
23006 arm_compute_frame_layout (void)
23008 struct arm_stack_offsets
*offsets
;
23009 unsigned long func_type
;
23012 HOST_WIDE_INT frame_size
;
23015 offsets
= &cfun
->machine
->stack_offsets
;
23017 /* Initially this is the size of the local variables. It will translated
23018 into an offset once we have determined the size of preceding data. */
23019 frame_size
= ROUND_UP_WORD (get_frame_size ());
23021 /* Space for variadic functions. */
23022 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
23024 /* In Thumb mode this is incorrect, but never used. */
23026 = (offsets
->saved_args
23027 + arm_compute_static_chain_stack_bytes ()
23028 + (frame_pointer_needed
? 4 : 0));
23032 unsigned int regno
;
23034 offsets
->saved_regs_mask
= arm_compute_save_core_reg_mask ();
23035 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
23036 saved
= core_saved
;
23038 /* We know that SP will be doubleword aligned on entry, and we must
23039 preserve that condition at any subroutine call. We also require the
23040 soft frame pointer to be doubleword aligned. */
23042 if (TARGET_REALLY_IWMMXT
)
23044 /* Check for the call-saved iWMMXt registers. */
23045 for (regno
= FIRST_IWMMXT_REGNUM
;
23046 regno
<= LAST_IWMMXT_REGNUM
;
23048 if (reg_needs_saving_p (regno
))
23052 func_type
= arm_current_func_type ();
23053 /* Space for saved VFP registers. */
23054 if (! IS_VOLATILE (func_type
)
23055 && TARGET_VFP_BASE
)
23056 saved
+= arm_get_vfp_saved_size ();
23058 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
23059 nonecure entry functions with VSTR/VLDR. */
23060 if (TARGET_HAVE_FPCXT_CMSE
&& IS_CMSE_ENTRY (func_type
))
23063 else /* TARGET_THUMB1 */
23065 offsets
->saved_regs_mask
= thumb1_compute_save_core_reg_mask ();
23066 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
23067 saved
= core_saved
;
23068 if (TARGET_BACKTRACE
)
23072 /* Saved registers include the stack frame. */
23073 offsets
->saved_regs
23074 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
23075 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
23077 /* A leaf function does not need any stack alignment if it has nothing
23079 if (crtl
->is_leaf
&& frame_size
== 0
23080 /* However if it calls alloca(), we have a dynamically allocated
23081 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
23082 && ! cfun
->calls_alloca
)
23084 offsets
->outgoing_args
= offsets
->soft_frame
;
23085 offsets
->locals_base
= offsets
->soft_frame
;
23089 /* Ensure SFP has the correct alignment. */
23090 if (ARM_DOUBLEWORD_ALIGN
23091 && (offsets
->soft_frame
& 7))
23093 offsets
->soft_frame
+= 4;
23094 /* Try to align stack by pushing an extra reg. Don't bother doing this
23095 when there is a stack frame as the alignment will be rolled into
23096 the normal stack adjustment. */
23097 if (frame_size
+ crtl
->outgoing_args_size
== 0)
23101 /* Register r3 is caller-saved. Normally it does not need to be
23102 saved on entry by the prologue. However if we choose to save
23103 it for padding then we may confuse the compiler into thinking
23104 a prologue sequence is required when in fact it is not. This
23105 will occur when shrink-wrapping if r3 is used as a scratch
23106 register and there are no other callee-saved writes.
23108 This situation can be avoided when other callee-saved registers
23109 are available and r3 is not mandatory if we choose a callee-saved
23110 register for padding. */
23111 bool prefer_callee_reg_p
= false;
23113 /* If it is safe to use r3, then do so. This sometimes
23114 generates better code on Thumb-2 by avoiding the need to
23115 use 32-bit push/pop instructions. */
23116 if (! any_sibcall_could_use_r3 ()
23117 && arm_size_return_regs () <= 12
23118 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
23120 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
23123 if (!TARGET_THUMB2
)
23124 prefer_callee_reg_p
= true;
23127 || prefer_callee_reg_p
)
23129 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
23131 /* Avoid fixed registers; they may be changed at
23132 arbitrary times so it's unsafe to restore them
23133 during the epilogue. */
23135 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
23145 offsets
->saved_regs
+= 4;
23146 offsets
->saved_regs_mask
|= (1 << reg
);
23151 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
23152 offsets
->outgoing_args
= (offsets
->locals_base
23153 + crtl
->outgoing_args_size
);
23155 if (ARM_DOUBLEWORD_ALIGN
)
23157 /* Ensure SP remains doubleword aligned. */
23158 if (offsets
->outgoing_args
& 7)
23159 offsets
->outgoing_args
+= 4;
23160 gcc_assert (!(offsets
->outgoing_args
& 7));
23165 /* Calculate the relative offsets for the different stack pointers. Positive
23166 offsets are in the direction of stack growth. */
23169 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
23171 arm_stack_offsets
*offsets
;
23173 offsets
= arm_get_frame_offsets ();
23175 /* OK, now we have enough information to compute the distances.
23176 There must be an entry in these switch tables for each pair
23177 of registers in ELIMINABLE_REGS, even if some of the entries
23178 seem to be redundant or useless. */
23181 case ARG_POINTER_REGNUM
:
23184 case THUMB_HARD_FRAME_POINTER_REGNUM
:
23187 case FRAME_POINTER_REGNUM
:
23188 /* This is the reverse of the soft frame pointer
23189 to hard frame pointer elimination below. */
23190 return offsets
->soft_frame
- offsets
->saved_args
;
23192 case ARM_HARD_FRAME_POINTER_REGNUM
:
23193 /* This is only non-zero in the case where the static chain register
23194 is stored above the frame. */
23195 return offsets
->frame
- offsets
->saved_args
- 4;
23197 case STACK_POINTER_REGNUM
:
23198 /* If nothing has been pushed on the stack at all
23199 then this will return -4. This *is* correct! */
23200 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
23203 gcc_unreachable ();
23205 gcc_unreachable ();
23207 case FRAME_POINTER_REGNUM
:
23210 case THUMB_HARD_FRAME_POINTER_REGNUM
:
23213 case ARM_HARD_FRAME_POINTER_REGNUM
:
23214 /* The hard frame pointer points to the top entry in the
23215 stack frame. The soft frame pointer to the bottom entry
23216 in the stack frame. If there is no stack frame at all,
23217 then they are identical. */
23219 return offsets
->frame
- offsets
->soft_frame
;
23221 case STACK_POINTER_REGNUM
:
23222 return offsets
->outgoing_args
- offsets
->soft_frame
;
23225 gcc_unreachable ();
23227 gcc_unreachable ();
23230 /* You cannot eliminate from the stack pointer.
23231 In theory you could eliminate from the hard frame
23232 pointer to the stack pointer, but this will never
23233 happen, since if a stack frame is not needed the
23234 hard frame pointer will never be used. */
23235 gcc_unreachable ();
23239 /* Given FROM and TO register numbers, say whether this elimination is
23240 allowed. Frame pointer elimination is automatically handled.
23242 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
23243 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
23244 pointer, we must eliminate FRAME_POINTER_REGNUM into
23245 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23246 ARG_POINTER_REGNUM. */
23249 arm_can_eliminate (const int from
, const int to
)
23251 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
23252 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
23253 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
23254 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
23258 /* Emit RTL to save coprocessor registers on function entry. Returns the
23259 number of bytes pushed. */
23262 arm_save_coproc_regs(void)
23264 int saved_size
= 0;
23266 unsigned start_reg
;
23269 if (TARGET_REALLY_IWMMXT
)
23270 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
23271 if (reg_needs_saving_p (reg
))
23273 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23274 insn
= gen_rtx_MEM (V2SImode
, insn
);
23275 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
23276 RTX_FRAME_RELATED_P (insn
) = 1;
23280 if (TARGET_VFP_BASE
)
23282 start_reg
= FIRST_VFP_REGNUM
;
23284 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
23286 if (!reg_needs_saving_p (reg
) && !reg_needs_saving_p (reg
+ 1))
23288 if (start_reg
!= reg
)
23289 saved_size
+= vfp_emit_fstmd (start_reg
,
23290 (reg
- start_reg
) / 2);
23291 start_reg
= reg
+ 2;
23294 if (start_reg
!= reg
)
23295 saved_size
+= vfp_emit_fstmd (start_reg
,
23296 (reg
- start_reg
) / 2);
23302 /* Set the Thumb frame pointer from the stack pointer. */
23305 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
23307 HOST_WIDE_INT amount
;
23310 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
23312 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23313 stack_pointer_rtx
, GEN_INT (amount
)));
23316 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
23317 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
23318 expects the first two operands to be the same. */
23321 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23323 hard_frame_pointer_rtx
));
23327 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23328 hard_frame_pointer_rtx
,
23329 stack_pointer_rtx
));
23331 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
23332 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
23333 RTX_FRAME_RELATED_P (dwarf
) = 1;
23334 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23337 RTX_FRAME_RELATED_P (insn
) = 1;
23340 struct scratch_reg
{
23345 /* Return a short-lived scratch register for use as a 2nd scratch register on
23346 function entry after the registers are saved in the prologue. This register
23347 must be released by means of release_scratch_register_on_entry. IP is not
23348 considered since it is always used as the 1st scratch register if available.
23350 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23351 mask of live registers. */
23354 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
23355 unsigned long live_regs
)
23361 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
23367 for (i
= 4; i
< 11; i
++)
23368 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
23376 /* If IP is used as the 1st scratch register for a nested function,
23377 then either r3 wasn't available or is used to preserve IP. */
23378 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
23380 regno
= (regno1
== 3 ? 2 : 3);
23382 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
23387 sr
->reg
= gen_rtx_REG (SImode
, regno
);
23390 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23391 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
23392 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
23393 plus_constant (Pmode
, stack_pointer_rtx
, -4));
23394 RTX_FRAME_RELATED_P (insn
) = 1;
23395 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
23399 /* Release a scratch register obtained from the preceding function. */
23402 release_scratch_register_on_entry (struct scratch_reg
*sr
)
23406 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
23407 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
23408 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
23409 plus_constant (Pmode
, stack_pointer_rtx
, 4));
23410 RTX_FRAME_RELATED_P (insn
) = 1;
23411 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
23415 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23417 #if PROBE_INTERVAL > 4096
23418 #error Cannot use indexed addressing mode for stack probing
23421 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23422 inclusive. These are offsets from the current stack pointer. REGNO1
23423 is the index number of the 1st scratch register and LIVE_REGS is the
23424 mask of live registers. */
23427 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
23428 unsigned int regno1
, unsigned long live_regs
)
23430 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
23432 /* See if we have a constant small number of probes to generate. If so,
23433 that's the easy case. */
23434 if (size
<= PROBE_INTERVAL
)
23436 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
23437 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23438 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
23441 /* The run-time loop is made up of 10 insns in the generic case while the
23442 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23443 else if (size
<= 5 * PROBE_INTERVAL
)
23445 HOST_WIDE_INT i
, rem
;
23447 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
23448 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23449 emit_stack_probe (reg1
);
23451 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23452 it exceeds SIZE. If only two probes are needed, this will not
23453 generate any code. Then probe at FIRST + SIZE. */
23454 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
23456 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
23457 emit_stack_probe (reg1
);
23460 rem
= size
- (i
- PROBE_INTERVAL
);
23461 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
23463 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
23464 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
23467 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
23470 /* Otherwise, do the same as above, but in a loop. Note that we must be
23471 extra careful with variables wrapping around because we might be at
23472 the very top (or the very bottom) of the address space and we have
23473 to be able to handle this case properly; in particular, we use an
23474 equality test for the loop condition. */
23477 HOST_WIDE_INT rounded_size
;
23478 struct scratch_reg sr
;
23480 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
23482 emit_move_insn (reg1
, GEN_INT (first
));
23485 /* Step 1: round SIZE to the previous multiple of the interval. */
23487 rounded_size
= size
& -PROBE_INTERVAL
;
23488 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
23491 /* Step 2: compute initial and final value of the loop counter. */
23493 /* TEST_ADDR = SP + FIRST. */
23494 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23496 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23497 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
23500 /* Step 3: the loop
23504 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23507 while (TEST_ADDR != LAST_ADDR)
23509 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23510 until it is equal to ROUNDED_SIZE. */
23512 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
23515 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23516 that SIZE is equal to ROUNDED_SIZE. */
23518 if (size
!= rounded_size
)
23520 HOST_WIDE_INT rem
= size
- rounded_size
;
23522 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
23524 emit_set_insn (sr
.reg
,
23525 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
23526 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
23527 PROBE_INTERVAL
- rem
));
23530 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
23533 release_scratch_register_on_entry (&sr
);
23536 /* Make sure nothing is scheduled before we are done. */
23537 emit_insn (gen_blockage ());
23540 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23541 absolute addresses. */
23544 output_probe_stack_range (rtx reg1
, rtx reg2
)
23546 static int labelno
= 0;
23550 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
23553 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
23555 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23557 xops
[1] = GEN_INT (PROBE_INTERVAL
);
23558 output_asm_insn ("sub\t%0, %0, %1", xops
);
23560 /* Probe at TEST_ADDR. */
23561 output_asm_insn ("str\tr0, [%0, #0]", xops
);
23563 /* Test if TEST_ADDR == LAST_ADDR. */
23565 output_asm_insn ("cmp\t%0, %1", xops
);
23568 fputs ("\tbne\t", asm_out_file
);
23569 assemble_name_raw (asm_out_file
, loop_lab
);
23570 fputc ('\n', asm_out_file
);
23575 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23578 arm_expand_prologue (void)
23583 unsigned long live_regs_mask
;
23584 unsigned long func_type
;
23586 int saved_pretend_args
= 0;
23587 int saved_regs
= 0;
23588 unsigned HOST_WIDE_INT args_to_push
;
23589 HOST_WIDE_INT size
;
23590 arm_stack_offsets
*offsets
;
23593 func_type
= arm_current_func_type ();
23595 /* Naked functions don't have prologues. */
23596 if (IS_NAKED (func_type
))
23598 if (flag_stack_usage_info
)
23599 current_function_static_stack_size
= 0;
23603 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23604 args_to_push
= crtl
->args
.pretend_args_size
;
23606 /* Compute which register we will have to save onto the stack. */
23607 offsets
= arm_get_frame_offsets ();
23608 live_regs_mask
= offsets
->saved_regs_mask
;
23610 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
23612 if (IS_STACKALIGN (func_type
))
23616 /* Handle a word-aligned stack pointer. We generate the following:
23621 <save and restore r0 in normal prologue/epilogue>
23625 The unwinder doesn't need to know about the stack realignment.
23626 Just tell it we saved SP in r0. */
23627 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
23629 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
23630 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
23632 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
23633 RTX_FRAME_RELATED_P (insn
) = 1;
23634 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
23636 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
23638 /* ??? The CFA changes here, which may cause GDB to conclude that it
23639 has entered a different function. That said, the unwind info is
23640 correct, individually, before and after this instruction because
23641 we've described the save of SP, which will override the default
23642 handling of SP as restoring from the CFA. */
23643 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
23646 /* Let's compute the static_chain_stack_bytes required and store it. Right
23647 now the value must be -1 as stored by arm_init_machine_status (). */
23648 cfun
->machine
->static_chain_stack_bytes
23649 = arm_compute_static_chain_stack_bytes ();
23651 /* The static chain register is the same as the IP register. If it is
23652 clobbered when creating the frame, we need to save and restore it. */
23653 clobber_ip
= (IS_NESTED (func_type
)
23654 && (((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
23655 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
23656 || flag_stack_clash_protection
)
23657 && !df_regs_ever_live_p (LR_REGNUM
)
23658 && arm_r3_live_at_start_p ()))
23659 || arm_current_function_pac_enabled_p ()));
23661 /* Find somewhere to store IP whilst the frame is being created.
23662 We try the following places in order:
23664 1. The last argument register r3 if it is available.
23665 2. A slot on the stack above the frame if there are no
23666 arguments to push onto the stack.
23667 3. Register r3 again, after pushing the argument registers
23668 onto the stack, if this is a varargs function.
23669 4. The last slot on the stack created for the arguments to
23670 push, if this isn't a varargs function.
23672 Note - we only need to tell the dwarf2 backend about the SP
23673 adjustment in the second variant; the static chain register
23674 doesn't need to be unwound, as it doesn't contain a value
23675 inherited from the caller. */
23678 if (!arm_r3_live_at_start_p ())
23679 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
23680 else if (args_to_push
== 0)
23686 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23687 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
23690 /* Just tell the dwarf backend that we adjusted SP. */
23691 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23692 plus_constant (Pmode
, stack_pointer_rtx
,
23694 RTX_FRAME_RELATED_P (insn
) = 1;
23695 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23696 if (arm_current_function_pac_enabled_p ())
23697 cfun
->machine
->pacspval_needed
= 1;
23701 /* Store the args on the stack. */
23702 if (cfun
->machine
->uses_anonymous_args
)
23704 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
23705 (0xf0 >> (args_to_push
/ 4)) & 0xf);
23706 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
23707 saved_pretend_args
= 1;
23713 if (args_to_push
== 4)
23714 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23716 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
23717 plus_constant (Pmode
,
23721 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
23723 /* Just tell the dwarf backend that we adjusted SP. */
23724 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23725 plus_constant (Pmode
, stack_pointer_rtx
,
23727 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23730 RTX_FRAME_RELATED_P (insn
) = 1;
23731 fp_offset
= args_to_push
;
23733 if (arm_current_function_pac_enabled_p ())
23734 cfun
->machine
->pacspval_needed
= 1;
23738 if (arm_current_function_pac_enabled_p ())
23740 /* If IP was clobbered we only emit a PAC instruction as the BTI
23741 one will be added before the push of the clobbered IP (if
23742 necessary) by the bti pass. */
23743 if (aarch_bti_enabled () && !clobber_ip
)
23744 insn
= emit_insn (gen_pacbti_nop ());
23746 insn
= emit_insn (gen_pac_nop ());
23748 rtx dwarf
= gen_rtx_SET (ip_rtx
, gen_rtx_REG (SImode
, RA_AUTH_CODE
));
23749 RTX_FRAME_RELATED_P (insn
) = 1;
23750 add_reg_note (insn
, REG_CFA_REGISTER
, dwarf
);
23753 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
23755 if (IS_INTERRUPT (func_type
))
23757 /* Interrupt functions must not corrupt any registers.
23758 Creating a frame pointer however, corrupts the IP
23759 register, so we must push it first. */
23760 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
23762 /* Do not set RTX_FRAME_RELATED_P on this insn.
23763 The dwarf stack unwinding code only wants to see one
23764 stack decrement per function, and this is not it. If
23765 this instruction is labeled as being part of the frame
23766 creation sequence then dwarf2out_frame_debug_expr will
23767 die when it encounters the assignment of IP to FP
23768 later on, since the use of SP here establishes SP as
23769 the CFA register and not IP.
23771 Anyway this instruction is not really part of the stack
23772 frame creation although it is part of the prologue. */
23775 insn
= emit_set_insn (ip_rtx
,
23776 plus_constant (Pmode
, stack_pointer_rtx
,
23778 RTX_FRAME_RELATED_P (insn
) = 1;
23781 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23782 if (TARGET_HAVE_FPCXT_CMSE
&& IS_CMSE_ENTRY (func_type
))
23785 insn
= emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx
,
23786 GEN_INT (FPCXTNS_ENUM
)));
23787 rtx dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23788 plus_constant (Pmode
, stack_pointer_rtx
, -4));
23789 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23790 RTX_FRAME_RELATED_P (insn
) = 1;
23795 /* Push the argument registers, or reserve space for them. */
23796 if (cfun
->machine
->uses_anonymous_args
)
23797 insn
= emit_multi_reg_push
23798 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
23799 (0xf0 >> (args_to_push
/ 4)) & 0xf);
23802 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
23803 GEN_INT (- args_to_push
)));
23804 RTX_FRAME_RELATED_P (insn
) = 1;
23807 /* If this is an interrupt service routine, and the link register
23808 is going to be pushed, and we're not generating extra
23809 push of IP (needed when frame is needed and frame layout if apcs),
23810 subtracting four from LR now will mean that the function return
23811 can be done with a single instruction. */
23812 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
23813 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
23814 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
23817 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
23819 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
23822 if (live_regs_mask
)
23824 unsigned long dwarf_regs_mask
= live_regs_mask
;
23826 saved_regs
+= bit_count (live_regs_mask
) * 4;
23827 if (optimize_size
&& !frame_pointer_needed
23828 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
23830 /* If no coprocessor registers are being pushed and we don't have
23831 to worry about a frame pointer then push extra registers to
23832 create the stack frame. This is done in a way that does not
23833 alter the frame layout, so is independent of the epilogue. */
23837 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
23839 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
23840 if (frame
&& n
* 4 >= frame
)
23843 live_regs_mask
|= (1 << n
) - 1;
23844 saved_regs
+= frame
;
23849 && current_tune
->prefer_ldrd_strd
23850 && !optimize_function_for_size_p (cfun
))
23852 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
23854 thumb2_emit_strd_push (live_regs_mask
);
23855 else if (TARGET_ARM
23856 && !TARGET_APCS_FRAME
23857 && !IS_INTERRUPT (func_type
))
23858 arm_emit_strd_push (live_regs_mask
);
23861 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
23862 RTX_FRAME_RELATED_P (insn
) = 1;
23867 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
23868 RTX_FRAME_RELATED_P (insn
) = 1;
23872 if (! IS_VOLATILE (func_type
))
23873 saved_regs
+= arm_save_coproc_regs ();
23875 if (frame_pointer_needed
&& TARGET_ARM
)
23877 /* Create the new frame pointer. */
23878 if (TARGET_APCS_FRAME
)
23880 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
23881 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
23882 RTX_FRAME_RELATED_P (insn
) = 1;
23886 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
23887 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23888 stack_pointer_rtx
, insn
));
23889 RTX_FRAME_RELATED_P (insn
) = 1;
23893 size
= offsets
->outgoing_args
- offsets
->saved_args
;
23894 if (flag_stack_usage_info
)
23895 current_function_static_stack_size
= size
;
23897 /* If this isn't an interrupt service routine and we have a frame, then do
23898 stack checking. We use IP as the first scratch register, except for the
23899 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
23900 if (!IS_INTERRUPT (func_type
)
23901 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
23902 || flag_stack_clash_protection
))
23904 unsigned int regno
;
23906 if (!IS_NESTED (func_type
) || clobber_ip
)
23908 else if (df_regs_ever_live_p (LR_REGNUM
))
23913 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
23915 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
23916 arm_emit_probe_stack_range (get_stack_check_protect (),
23917 size
- get_stack_check_protect (),
23918 regno
, live_regs_mask
);
23921 arm_emit_probe_stack_range (get_stack_check_protect (), size
,
23922 regno
, live_regs_mask
);
23925 /* Recover the static chain register. */
23928 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
23929 insn
= gen_rtx_REG (SImode
, 3);
23932 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
23933 insn
= gen_frame_mem (SImode
, insn
);
23935 emit_set_insn (ip_rtx
, insn
);
23936 emit_insn (gen_force_register_use (ip_rtx
));
23939 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
23941 /* This add can produce multiple insns for a large constant, so we
23942 need to get tricky. */
23943 rtx_insn
*last
= get_last_insn ();
23945 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
23946 - offsets
->outgoing_args
);
23948 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
23952 last
= last
? NEXT_INSN (last
) : get_insns ();
23953 RTX_FRAME_RELATED_P (last
) = 1;
23955 while (last
!= insn
);
23957 /* If the frame pointer is needed, emit a special barrier that
23958 will prevent the scheduler from moving stores to the frame
23959 before the stack adjustment. */
23960 if (frame_pointer_needed
)
23961 emit_insn (gen_stack_tie (stack_pointer_rtx
,
23962 hard_frame_pointer_rtx
));
23966 if (frame_pointer_needed
&& TARGET_THUMB2
)
23967 thumb_set_frame_pointer (offsets
);
23969 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
23971 unsigned long mask
;
23973 mask
= live_regs_mask
;
23974 mask
&= THUMB2_WORK_REGS
;
23975 if (!IS_NESTED (func_type
))
23976 mask
|= (1 << IP_REGNUM
);
23977 arm_load_pic_register (mask
, NULL_RTX
);
23980 /* If we are profiling, make sure no instructions are scheduled before
23981 the call to mcount. Similarly if the user has requested no
23982 scheduling in the prolog. Similarly if we want non-call exceptions
23983 using the EABI unwinder, to prevent faulting instructions from being
23984 swapped with a stack adjustment. */
23985 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
23986 || (arm_except_unwind_info (&global_options
) == UI_TARGET
23987 && cfun
->can_throw_non_call_exceptions
))
23988 emit_insn (gen_blockage ());
23990 /* If the link register is being kept alive, with the return address in it,
23991 then make sure that it does not get reused by the ce2 pass. */
23992 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
23993 cfun
->machine
->lr_save_eliminated
= 1;
23996 /* Print condition code to STREAM. Helper function for arm_print_operand. */
23998 arm_print_condition (FILE *stream
)
24000 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
24002 /* Branch conversion is not implemented for Thumb-2. */
24005 output_operand_lossage ("predicated Thumb instruction");
24008 if (current_insn_predicate
!= NULL
)
24010 output_operand_lossage
24011 ("predicated instruction in conditional sequence");
24015 fputs (arm_condition_codes
[arm_current_cc
], stream
);
24017 else if (current_insn_predicate
)
24019 enum arm_cond_code code
;
24023 output_operand_lossage ("predicated Thumb instruction");
24027 code
= get_arm_condition_code (current_insn_predicate
);
24028 fputs (arm_condition_codes
[code
], stream
);
24033 /* Globally reserved letters: acln
24034 Puncutation letters currently used: @_|?().!#
24035 Lower case letters currently used: bcdefhimpqtvwxyz
24036 Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
24037 Letters previously used, but now deprecated/obsolete: sWXYZ.
24039 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
24041 If CODE is 'd', then the X is a condition operand and the instruction
24042 should only be executed if the condition is true.
24043 if CODE is 'D', then the X is a condition operand and the instruction
24044 should only be executed if the condition is false: however, if the mode
24045 of the comparison is CCFPEmode, then always execute the instruction -- we
24046 do this because in these circumstances !GE does not necessarily imply LT;
24047 in these cases the instruction pattern will take care to make sure that
24048 an instruction containing %d will follow, thereby undoing the effects of
24049 doing this instruction unconditionally.
24050 If CODE is 'N' then X is a floating point operand that must be negated
24052 If CODE is 'B' then output a bitwise inverted value of X (a const int).
24053 If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
24054 If CODE is 'V', then the operand must be a CONST_INT representing
24055 the bits to preserve in the modified register (Rd) of a BFI or BFC
24056 instruction: print out both the width and lsb (shift) fields. */
24058 arm_print_operand (FILE *stream
, rtx x
, int code
)
24063 fputs (ASM_COMMENT_START
, stream
);
24067 fputs (user_label_prefix
, stream
);
24071 fputs (REGISTER_PREFIX
, stream
);
24075 arm_print_condition (stream
);
24079 /* The current condition code for a condition code setting instruction.
24080 Preceded by 's' in unified syntax, otherwise followed by 's'. */
24081 fputc('s', stream
);
24082 arm_print_condition (stream
);
24086 /* If the instruction is conditionally executed then print
24087 the current condition code, otherwise print 's'. */
24088 gcc_assert (TARGET_THUMB2
);
24089 if (current_insn_predicate
)
24090 arm_print_condition (stream
);
24092 fputc('s', stream
);
24095 /* %# is a "break" sequence. It doesn't output anything, but is used to
24096 separate e.g. operand numbers from following text, if that text consists
24097 of further digits which we don't want to be part of the operand
24105 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
24106 fprintf (stream
, "%s", fp_const_from_val (&r
));
24110 /* An integer or symbol address without a preceding # sign. */
24112 switch (GET_CODE (x
))
24115 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
24119 output_addr_const (stream
, x
);
24123 if (GET_CODE (XEXP (x
, 0)) == PLUS
24124 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
24126 output_addr_const (stream
, x
);
24129 /* Fall through. */
24132 output_operand_lossage ("Unsupported operand for code '%c'", code
);
24136 /* An integer that we want to print in HEX. */
24138 switch (GET_CODE (x
))
24141 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
24145 output_operand_lossage ("Unsupported operand for code '%c'", code
);
24150 if (CONST_INT_P (x
))
24153 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
24154 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
24158 putc ('~', stream
);
24159 output_addr_const (stream
, x
);
24164 /* Print the log2 of a CONST_INT. */
24168 if (!CONST_INT_P (x
)
24169 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
24170 output_operand_lossage ("Unsupported operand for code '%c'", code
);
24172 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
24177 /* The low 16 bits of an immediate constant. */
24178 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
24182 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
24186 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
24194 shift
= shift_op (x
, &val
);
24198 fprintf (stream
, ", %s ", shift
);
24200 arm_print_operand (stream
, XEXP (x
, 1), 0);
24202 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
24207 /* An explanation of the 'Q', 'R' and 'H' register operands:
24209 In a pair of registers containing a DI or DF value the 'Q'
24210 operand returns the register number of the register containing
24211 the least significant part of the value. The 'R' operand returns
24212 the register number of the register containing the most
24213 significant part of the value.
24215 The 'H' operand returns the higher of the two register numbers.
24216 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24217 same as the 'Q' operand, since the most significant part of the
24218 value is held in the lower number register. The reverse is true
24219 on systems where WORDS_BIG_ENDIAN is false.
24221 The purpose of these operands is to distinguish between cases
24222 where the endian-ness of the values is important (for example
24223 when they are added together), and cases where the endian-ness
24224 is irrelevant, but the order of register operations is important.
24225 For example when loading a value from memory into a register
24226 pair, the endian-ness does not matter. Provided that the value
24227 from the lower memory address is put into the lower numbered
24228 register, and the value from the higher address is put into the
24229 higher numbered register, the load will work regardless of whether
24230 the value being loaded is big-wordian or little-wordian. The
24231 order of the two register loads can matter however, if the address
24232 of the memory location is actually held in one of the registers
24233 being overwritten by the load.
24235 The 'Q' and 'R' constraints are also available for 64-bit
24238 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
24240 rtx part
= gen_lowpart (SImode
, x
);
24241 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
24245 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24247 output_operand_lossage ("invalid operand for code '%c'", code
);
24251 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
24255 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
24257 machine_mode mode
= GET_MODE (x
);
24260 if (mode
== VOIDmode
)
24262 part
= gen_highpart_mode (SImode
, mode
, x
);
24263 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
24267 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24269 output_operand_lossage ("invalid operand for code '%c'", code
);
24273 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
24277 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24279 output_operand_lossage ("invalid operand for code '%c'", code
);
24283 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
24287 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24289 output_operand_lossage ("invalid operand for code '%c'", code
);
24293 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
24297 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24299 output_operand_lossage ("invalid operand for code '%c'", code
);
24303 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
24307 asm_fprintf (stream
, "%r",
24308 REG_P (XEXP (x
, 0))
24309 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
24313 asm_fprintf (stream
, "{%r-%r}",
24315 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
24318 /* Like 'M', but writing doubleword vector registers, for use by Neon
24322 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
24323 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
24325 asm_fprintf (stream
, "{d%d}", regno
);
24327 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
24332 /* CONST_TRUE_RTX means always -- that's the default. */
24333 if (x
== const_true_rtx
)
24336 if (!COMPARISON_P (x
))
24338 output_operand_lossage ("invalid operand for code '%c'", code
);
24342 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
24347 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
24348 want to do that. */
24349 if (x
== const_true_rtx
)
24351 output_operand_lossage ("instruction never executed");
24354 if (!COMPARISON_P (x
))
24356 output_operand_lossage ("invalid operand for code '%c'", code
);
24360 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
24361 (get_arm_condition_code (x
))],
24367 /* Output the LSB (shift) and width for a bitmask instruction
24368 based on a literal mask. The LSB is printed first,
24369 followed by the width.
24371 Eg. For 0b1...1110001, the result is #1, #3. */
24372 if (!CONST_INT_P (x
))
24374 output_operand_lossage ("invalid operand for code '%c'", code
);
24378 unsigned HOST_WIDE_INT val
24379 = ~UINTVAL (x
) & HOST_WIDE_INT_UC (0xffffffff);
24380 int lsb
= exact_log2 (val
& -val
);
24381 asm_fprintf (stream
, "#%d, #%d", lsb
,
24382 (exact_log2 (val
+ (val
& -val
)) - lsb
));
24391 /* Former Maverick support, removed after GCC-4.7. */
24392 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
24397 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
24398 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
24399 /* Bad value for wCG register number. */
24401 output_operand_lossage ("invalid operand for code '%c'", code
);
24406 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
24409 /* Print an iWMMXt control register name. */
24411 if (!CONST_INT_P (x
)
24413 || INTVAL (x
) >= 16)
24414 /* Bad value for wC register number. */
24416 output_operand_lossage ("invalid operand for code '%c'", code
);
24422 static const char * wc_reg_names
[16] =
24424 "wCID", "wCon", "wCSSF", "wCASF",
24425 "wC4", "wC5", "wC6", "wC7",
24426 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24427 "wC12", "wC13", "wC14", "wC15"
24430 fputs (wc_reg_names
[INTVAL (x
)], stream
);
24434 /* Print the high single-precision register of a VFP double-precision
24438 machine_mode mode
= GET_MODE (x
);
24441 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
24443 output_operand_lossage ("invalid operand for code '%c'", code
);
24448 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
24450 output_operand_lossage ("invalid operand for code '%c'", code
);
24454 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
24458 /* Print a VFP/Neon double precision or quad precision register name. */
24462 machine_mode mode
= GET_MODE (x
);
24463 int is_quad
= (code
== 'q');
24466 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
24468 output_operand_lossage ("invalid operand for code '%c'", code
);
24473 || !IS_VFP_REGNUM (REGNO (x
)))
24475 output_operand_lossage ("invalid operand for code '%c'", code
);
24480 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
24481 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
24483 output_operand_lossage ("invalid operand for code '%c'", code
);
24487 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
24488 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
24492 /* These two codes print the low/high doubleword register of a Neon quad
24493 register, respectively. For pair-structure types, can also print
24494 low/high quadword registers. */
24498 machine_mode mode
= GET_MODE (x
);
24501 if ((GET_MODE_SIZE (mode
) != 16
24502 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
24504 output_operand_lossage ("invalid operand for code '%c'", code
);
24509 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
24511 output_operand_lossage ("invalid operand for code '%c'", code
);
24515 if (GET_MODE_SIZE (mode
) == 16)
24516 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
24517 + (code
== 'f' ? 1 : 0));
24519 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
24520 + (code
== 'f' ? 1 : 0));
24524 /* Print a VFPv3 floating-point constant, represented as an integer
24528 int index
= vfp3_const_double_index (x
);
24529 gcc_assert (index
!= -1);
24530 fprintf (stream
, "%d", index
);
24534 /* Print bits representing opcode features for Neon.
24536 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24537 and polynomials as unsigned.
24539 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24541 Bit 2 is 1 for rounding functions, 0 otherwise. */
24543 /* Identify the type as 's', 'u', 'p' or 'f'. */
24546 HOST_WIDE_INT bits
= INTVAL (x
);
24547 fputc ("uspf"[bits
& 3], stream
);
24551 /* Likewise, but signed and unsigned integers are both 'i'. */
24554 HOST_WIDE_INT bits
= INTVAL (x
);
24555 fputc ("iipf"[bits
& 3], stream
);
24559 /* As for 'T', but emit 'u' instead of 'p'. */
24562 HOST_WIDE_INT bits
= INTVAL (x
);
24563 fputc ("usuf"[bits
& 3], stream
);
24567 /* Bit 2: rounding (vs none). */
24570 HOST_WIDE_INT bits
= INTVAL (x
);
24571 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
24575 /* Memory operand for vld1/vst1 instruction. */
24579 bool postinc
= FALSE
;
24580 rtx postinc_reg
= NULL
;
24581 unsigned align
, memsize
, align_bits
;
24583 gcc_assert (MEM_P (x
));
24584 addr
= XEXP (x
, 0);
24585 if (GET_CODE (addr
) == POST_INC
)
24588 addr
= XEXP (addr
, 0);
24590 if (GET_CODE (addr
) == POST_MODIFY
)
24592 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
24593 addr
= XEXP (addr
, 0);
24595 asm_fprintf (stream
, "[%r", REGNO (addr
));
24597 /* We know the alignment of this access, so we can emit a hint in the
24598 instruction (for some alignments) as an aid to the memory subsystem
24600 align
= MEM_ALIGN (x
) >> 3;
24601 memsize
= MEM_SIZE (x
);
24603 /* Only certain alignment specifiers are supported by the hardware. */
24604 if (memsize
== 32 && (align
% 32) == 0)
24606 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
24608 else if (memsize
>= 8 && (align
% 8) == 0)
24613 if (align_bits
!= 0)
24614 asm_fprintf (stream
, ":%d", align_bits
);
24616 asm_fprintf (stream
, "]");
24619 fputs("!", stream
);
24621 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
24625 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24626 rtx_code the memory operands output looks like following.
24628 2. [Rn, #+/-<imm>]!
24634 rtx postinc_reg
= NULL
;
24635 unsigned inc_val
= 0;
24636 enum rtx_code code
;
24638 gcc_assert (MEM_P (x
));
24639 addr
= XEXP (x
, 0);
24640 code
= GET_CODE (addr
);
24641 if (code
== POST_INC
|| code
== POST_DEC
|| code
== PRE_INC
24642 || code
== PRE_DEC
)
24644 asm_fprintf (stream
, "[%r", REGNO (XEXP (addr
, 0)));
24645 inc_val
= GET_MODE_SIZE (GET_MODE (x
));
24646 if (code
== POST_INC
|| code
== POST_DEC
)
24647 asm_fprintf (stream
, "], #%s%d",(code
== POST_INC
)
24648 ? "": "-", inc_val
);
24650 asm_fprintf (stream
, ", #%s%d]!",(code
== PRE_INC
)
24651 ? "": "-", inc_val
);
24653 else if (code
== POST_MODIFY
|| code
== PRE_MODIFY
)
24655 asm_fprintf (stream
, "[%r", REGNO (XEXP (addr
, 0)));
24656 postinc_reg
= XEXP (XEXP (addr
, 1), 1);
24657 if (postinc_reg
&& CONST_INT_P (postinc_reg
))
24659 if (code
== POST_MODIFY
)
24660 asm_fprintf (stream
, "], #%wd",INTVAL (postinc_reg
));
24662 asm_fprintf (stream
, ", #%wd]!",INTVAL (postinc_reg
));
24665 else if (code
== PLUS
)
24667 rtx base
= XEXP (addr
, 0);
24668 rtx index
= XEXP (addr
, 1);
24670 gcc_assert (REG_P (base
) && CONST_INT_P (index
));
24672 HOST_WIDE_INT offset
= INTVAL (index
);
24673 asm_fprintf (stream
, "[%r, #%wd]", REGNO (base
), offset
);
24677 gcc_assert (REG_P (addr
));
24678 asm_fprintf (stream
, "[%r]",REGNO (addr
));
24687 gcc_assert (MEM_P (x
));
24688 addr
= XEXP (x
, 0);
24689 gcc_assert (REG_P (addr
));
24690 asm_fprintf (stream
, "[%r]", REGNO (addr
));
24694 /* Translate an S register number into a D register number and element index. */
24697 machine_mode mode
= GET_MODE (x
);
24700 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
24702 output_operand_lossage ("invalid operand for code '%c'", code
);
24707 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
24709 output_operand_lossage ("invalid operand for code '%c'", code
);
24713 regno
= regno
- FIRST_VFP_REGNUM
;
24714 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
24719 gcc_assert (CONST_DOUBLE_P (x
));
24721 result
= vfp3_const_double_for_fract_bits (x
);
24723 result
= vfp3_const_double_for_bits (x
);
24724 fprintf (stream
, "#%d", result
);
24727 /* Register specifier for vld1.16/vst1.16. Translate the S register
24728 number into a D register number and element index. */
24731 machine_mode mode
= GET_MODE (x
);
24734 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
24736 output_operand_lossage ("invalid operand for code '%c'", code
);
24741 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
24743 output_operand_lossage ("invalid operand for code '%c'", code
);
24747 regno
= regno
- FIRST_VFP_REGNUM
;
24748 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
24755 output_operand_lossage ("missing operand");
24759 switch (GET_CODE (x
))
24762 asm_fprintf (stream
, "%r", REGNO (x
));
24766 output_address (GET_MODE (x
), XEXP (x
, 0));
24772 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
24773 sizeof (fpstr
), 0, 1);
24774 fprintf (stream
, "#%s", fpstr
);
24779 gcc_assert (GET_CODE (x
) != NEG
);
24780 fputc ('#', stream
);
24781 if (GET_CODE (x
) == HIGH
)
24783 fputs (":lower16:", stream
);
24787 output_addr_const (stream
, x
);
24793 /* Target hook for printing a memory address. */
24795 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
24799 int is_minus
= GET_CODE (x
) == MINUS
;
24802 asm_fprintf (stream
, "[%r]", REGNO (x
));
24803 else if (GET_CODE (x
) == PLUS
|| is_minus
)
24805 rtx base
= XEXP (x
, 0);
24806 rtx index
= XEXP (x
, 1);
24807 HOST_WIDE_INT offset
= 0;
24809 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
24811 /* Ensure that BASE is a register. */
24812 /* (one of them must be). */
24813 /* Also ensure the SP is not used as in index register. */
24814 std::swap (base
, index
);
24816 switch (GET_CODE (index
))
24819 offset
= INTVAL (index
);
24822 asm_fprintf (stream
, "[%r, #%wd]",
24823 REGNO (base
), offset
);
24827 asm_fprintf (stream
, "[%r, %s%r]",
24828 REGNO (base
), is_minus
? "-" : "",
24838 asm_fprintf (stream
, "[%r, %s%r",
24839 REGNO (base
), is_minus
? "-" : "",
24840 REGNO (XEXP (index
, 0)));
24841 arm_print_operand (stream
, index
, 'S');
24842 fputs ("]", stream
);
24847 gcc_unreachable ();
24850 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
24851 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
24853 gcc_assert (REG_P (XEXP (x
, 0)));
24855 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
24856 asm_fprintf (stream
, "[%r, #%s%d]!",
24857 REGNO (XEXP (x
, 0)),
24858 GET_CODE (x
) == PRE_DEC
? "-" : "",
24859 GET_MODE_SIZE (mode
));
24860 else if (TARGET_HAVE_MVE
&& (mode
== OImode
|| mode
== XImode
))
24861 asm_fprintf (stream
, "[%r]!", REGNO (XEXP (x
,0)));
24863 asm_fprintf (stream
, "[%r], #%s%d", REGNO (XEXP (x
, 0)),
24864 GET_CODE (x
) == POST_DEC
? "-" : "",
24865 GET_MODE_SIZE (mode
));
24867 else if (GET_CODE (x
) == PRE_MODIFY
)
24869 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
24870 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
24871 asm_fprintf (stream
, "#%wd]!",
24872 INTVAL (XEXP (XEXP (x
, 1), 1)));
24874 asm_fprintf (stream
, "%r]!",
24875 REGNO (XEXP (XEXP (x
, 1), 1)));
24877 else if (GET_CODE (x
) == POST_MODIFY
)
24879 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
24880 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
24881 asm_fprintf (stream
, "#%wd",
24882 INTVAL (XEXP (XEXP (x
, 1), 1)));
24884 asm_fprintf (stream
, "%r",
24885 REGNO (XEXP (XEXP (x
, 1), 1)));
24887 else output_addr_const (stream
, x
);
24892 asm_fprintf (stream
, "[%r]", REGNO (x
));
24893 else if (GET_CODE (x
) == POST_INC
)
24894 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
24895 else if (GET_CODE (x
) == PLUS
)
24897 gcc_assert (REG_P (XEXP (x
, 0)));
24898 if (CONST_INT_P (XEXP (x
, 1)))
24899 asm_fprintf (stream
, "[%r, #%wd]",
24900 REGNO (XEXP (x
, 0)),
24901 INTVAL (XEXP (x
, 1)));
24903 asm_fprintf (stream
, "[%r, %r]",
24904 REGNO (XEXP (x
, 0)),
24905 REGNO (XEXP (x
, 1)));
24908 output_addr_const (stream
, x
);
24912 /* Target hook for indicating whether a punctuation character for
24913 TARGET_PRINT_OPERAND is valid. */
24915 arm_print_operand_punct_valid_p (unsigned char code
)
24917 return (code
== '@' || code
== '|' || code
== '.'
24918 || code
== '(' || code
== ')' || code
== '#'
24919 || (TARGET_32BIT
&& (code
== '?'))
24920 || (TARGET_THUMB2
&& (code
== '!'))
24921 || (TARGET_THUMB
&& (code
== '_')));
24924 /* Target hook for assembling integer objects. The ARM version needs to
24925 handle word-sized values specially. */
24927 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
24931 if (size
== UNITS_PER_WORD
&& aligned_p
)
24933 fputs ("\t.word\t", asm_out_file
);
24934 output_addr_const (asm_out_file
, x
);
24936 /* Mark symbols as position independent. We only do this in the
24937 .text segment, not in the .data segment. */
24938 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
24939 (SYMBOL_REF_P (x
) || LABEL_REF_P (x
)))
24941 /* See legitimize_pic_address for an explanation of the
24942 TARGET_VXWORKS_RTP check. */
24943 /* References to weak symbols cannot be resolved locally:
24944 they may be overridden by a non-weak definition at link
24946 if (!arm_pic_data_is_text_relative
24947 || (SYMBOL_REF_P (x
)
24948 && (!SYMBOL_REF_LOCAL_P (x
)
24949 || (SYMBOL_REF_DECL (x
)
24950 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0)
24951 || (SYMBOL_REF_FUNCTION_P (x
)
24952 && !arm_fdpic_local_funcdesc_p (x
)))))
24954 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
24955 fputs ("(GOTFUNCDESC)", asm_out_file
);
24957 fputs ("(GOT)", asm_out_file
);
24961 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
24962 fputs ("(GOTOFFFUNCDESC)", asm_out_file
);
24968 || arm_is_segment_info_known (x
, &is_readonly
))
24969 fputs ("(GOTOFF)", asm_out_file
);
24971 fputs ("(GOT)", asm_out_file
);
24976 /* For FDPIC we also have to mark symbol for .data section. */
24978 && !making_const_table
24979 && SYMBOL_REF_P (x
)
24980 && SYMBOL_REF_FUNCTION_P (x
))
24981 fputs ("(FUNCDESC)", asm_out_file
);
24983 fputc ('\n', asm_out_file
);
24987 mode
= GET_MODE (x
);
24989 if (arm_vector_mode_supported_p (mode
))
24993 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
24995 units
= CONST_VECTOR_NUNITS (x
);
24996 size
= GET_MODE_UNIT_SIZE (mode
);
24998 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
24999 for (i
= 0; i
< units
; i
++)
25001 rtx elt
= CONST_VECTOR_ELT (x
, i
);
25003 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
25006 for (i
= 0; i
< units
; i
++)
25008 rtx elt
= CONST_VECTOR_ELT (x
, i
);
25010 (*CONST_DOUBLE_REAL_VALUE (elt
),
25011 as_a
<scalar_float_mode
> (GET_MODE_INNER (mode
)),
25012 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
25018 return default_assemble_integer (x
, size
, aligned_p
);
25022 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
25026 if (!TARGET_AAPCS_BASED
)
25029 default_named_section_asm_out_constructor
25030 : default_named_section_asm_out_destructor
) (symbol
, priority
);
25034 /* Put these in the .init_array section, using a special relocation. */
25035 if (priority
!= DEFAULT_INIT_PRIORITY
)
25038 sprintf (buf
, "%s.%.5u",
25039 is_ctor
? ".init_array" : ".fini_array",
25041 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
25048 switch_to_section (s
);
25049 assemble_align (POINTER_SIZE
);
25050 fputs ("\t.word\t", asm_out_file
);
25051 output_addr_const (asm_out_file
, symbol
);
25052 fputs ("(target1)\n", asm_out_file
);
25055 /* Add a function to the list of static constructors. */
25058 arm_elf_asm_constructor (rtx symbol
, int priority
)
25060 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
25063 /* Add a function to the list of static destructors. */
25066 arm_elf_asm_destructor (rtx symbol
, int priority
)
25068 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
25071 /* A finite state machine takes care of noticing whether or not instructions
25072 can be conditionally executed, and thus decrease execution time and code
25073 size by deleting branch instructions. The fsm is controlled by
25074 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
25076 /* The state of the fsm controlling condition codes are:
25077 0: normal, do nothing special
25078 1: make ASM_OUTPUT_OPCODE not output this instruction
25079 2: make ASM_OUTPUT_OPCODE not output this instruction
25080 3: make instructions conditional
25081 4: make instructions conditional
25083 State transitions (state->state by whom under condition):
25084 0 -> 1 final_prescan_insn if the `target' is a label
25085 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
25086 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
25087 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
25088 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
25089 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
25090 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
25091 (the target insn is arm_target_insn).
25093 If the jump clobbers the conditions then we use states 2 and 4.
25095 A similar thing can be done with conditional return insns.
25097 XXX In case the `target' is an unconditional branch, this conditionalising
25098 of the instructions always reduces code size, but not always execution
25099 time. But then, I want to reduce the code size to somewhere near what
25100 /bin/cc produces. */
25102 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
25103 instructions. When a COND_EXEC instruction is seen the subsequent
25104 instructions are scanned so that multiple conditional instructions can be
25105 combined into a single IT block. arm_condexec_count and arm_condexec_mask
25106 specify the length and true/false mask for the IT block. These will be
25107 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
25109 /* Returns the index of the ARM condition code string in
25110 `arm_condition_codes', or ARM_NV if the comparison is invalid.
25111 COMPARISON should be an rtx like `(eq (...) (...))'. */
25114 maybe_get_arm_condition_code (rtx comparison
)
25116 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
25117 enum arm_cond_code code
;
25118 enum rtx_code comp_code
= GET_CODE (comparison
);
25120 if (GET_MODE_CLASS (mode
) != MODE_CC
)
25121 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
25122 XEXP (comparison
, 1));
25126 case E_CC_DNEmode
: code
= ARM_NE
; goto dominance
;
25127 case E_CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
25128 case E_CC_DGEmode
: code
= ARM_GE
; goto dominance
;
25129 case E_CC_DGTmode
: code
= ARM_GT
; goto dominance
;
25130 case E_CC_DLEmode
: code
= ARM_LE
; goto dominance
;
25131 case E_CC_DLTmode
: code
= ARM_LT
; goto dominance
;
25132 case E_CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
25133 case E_CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
25134 case E_CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
25135 case E_CC_DLTUmode
: code
= ARM_CC
;
25138 if (comp_code
== EQ
)
25139 return ARM_INVERSE_CONDITION_CODE (code
);
25140 if (comp_code
== NE
)
25147 case NE
: return ARM_NE
;
25148 case EQ
: return ARM_EQ
;
25149 case GE
: return ARM_PL
;
25150 case LT
: return ARM_MI
;
25151 default: return ARM_NV
;
25157 case NE
: return ARM_NE
;
25158 case EQ
: return ARM_EQ
;
25159 default: return ARM_NV
;
25165 case NE
: return ARM_MI
;
25166 case EQ
: return ARM_PL
;
25167 default: return ARM_NV
;
25172 /* We can handle all cases except UNEQ and LTGT. */
25175 case GE
: return ARM_GE
;
25176 case GT
: return ARM_GT
;
25177 case LE
: return ARM_LS
;
25178 case LT
: return ARM_MI
;
25179 case NE
: return ARM_NE
;
25180 case EQ
: return ARM_EQ
;
25181 case ORDERED
: return ARM_VC
;
25182 case UNORDERED
: return ARM_VS
;
25183 case UNLT
: return ARM_LT
;
25184 case UNLE
: return ARM_LE
;
25185 case UNGT
: return ARM_HI
;
25186 case UNGE
: return ARM_PL
;
25187 /* UNEQ and LTGT do not have a representation. */
25188 case UNEQ
: /* Fall through. */
25189 case LTGT
: /* Fall through. */
25190 default: return ARM_NV
;
25196 case NE
: return ARM_NE
;
25197 case EQ
: return ARM_EQ
;
25198 case GE
: return ARM_LE
;
25199 case GT
: return ARM_LT
;
25200 case LE
: return ARM_GE
;
25201 case LT
: return ARM_GT
;
25202 case GEU
: return ARM_LS
;
25203 case GTU
: return ARM_CC
;
25204 case LEU
: return ARM_CS
;
25205 case LTU
: return ARM_HI
;
25206 default: return ARM_NV
;
25212 case LTU
: return ARM_CS
;
25213 case GEU
: return ARM_CC
;
25214 default: return ARM_NV
;
25220 case GE
: return ARM_GE
;
25221 case LT
: return ARM_LT
;
25222 default: return ARM_NV
;
25228 case GEU
: return ARM_CS
;
25229 case LTU
: return ARM_CC
;
25230 default: return ARM_NV
;
25236 case NE
: return ARM_VS
;
25237 case EQ
: return ARM_VC
;
25238 default: return ARM_NV
;
25244 case GEU
: return ARM_CS
;
25245 case LTU
: return ARM_CC
;
25246 default: return ARM_NV
;
25253 case NE
: return ARM_NE
;
25254 case EQ
: return ARM_EQ
;
25255 case GE
: return ARM_GE
;
25256 case GT
: return ARM_GT
;
25257 case LE
: return ARM_LE
;
25258 case LT
: return ARM_LT
;
25259 case GEU
: return ARM_CS
;
25260 case GTU
: return ARM_HI
;
25261 case LEU
: return ARM_LS
;
25262 case LTU
: return ARM_CC
;
25263 default: return ARM_NV
;
25266 default: gcc_unreachable ();
25270 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
25271 static enum arm_cond_code
25272 get_arm_condition_code (rtx comparison
)
25274 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
25275 gcc_assert (code
!= ARM_NV
);
25279 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
25280 code registers when not targetting Thumb1. The VFP condition register
25281 only exists when generating hard-float code. */
25283 arm_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
25289 *p2
= TARGET_VFP_BASE
? VFPCC_REGNUM
: INVALID_REGNUM
;
25293 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25296 thumb2_final_prescan_insn (rtx_insn
*insn
)
25298 rtx_insn
*first_insn
= insn
;
25299 rtx body
= PATTERN (insn
);
25301 enum arm_cond_code code
;
25306 /* max_insns_skipped in the tune was already taken into account in the
25307 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
25308 just emit the IT blocks as we can. It does not make sense to split
25310 max
= MAX_INSN_PER_IT_BLOCK
;
25312 /* Remove the previous insn from the count of insns to be output. */
25313 if (arm_condexec_count
)
25314 arm_condexec_count
--;
25316 /* Nothing to do if we are already inside a conditional block. */
25317 if (arm_condexec_count
)
25320 if (GET_CODE (body
) != COND_EXEC
)
25323 /* Conditional jumps are implemented directly. */
25327 predicate
= COND_EXEC_TEST (body
);
25328 arm_current_cc
= get_arm_condition_code (predicate
);
25330 n
= get_attr_ce_count (insn
);
25331 arm_condexec_count
= 1;
25332 arm_condexec_mask
= (1 << n
) - 1;
25333 arm_condexec_masklen
= n
;
25334 /* See if subsequent instructions can be combined into the same block. */
25337 insn
= next_nonnote_insn (insn
);
25339 /* Jumping into the middle of an IT block is illegal, so a label or
25340 barrier terminates the block. */
25341 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
25344 body
= PATTERN (insn
);
25345 /* USE and CLOBBER aren't really insns, so just skip them. */
25346 if (GET_CODE (body
) == USE
25347 || GET_CODE (body
) == CLOBBER
)
25350 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
25351 if (GET_CODE (body
) != COND_EXEC
)
25353 /* Maximum number of conditionally executed instructions in a block. */
25354 n
= get_attr_ce_count (insn
);
25355 if (arm_condexec_masklen
+ n
> max
)
25358 predicate
= COND_EXEC_TEST (body
);
25359 code
= get_arm_condition_code (predicate
);
25360 mask
= (1 << n
) - 1;
25361 if (arm_current_cc
== code
)
25362 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
25363 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
25366 arm_condexec_count
++;
25367 arm_condexec_masklen
+= n
;
25369 /* A jump must be the last instruction in a conditional block. */
25373 /* Restore recog_data (getting the attributes of other insns can
25374 destroy this array, but final.cc assumes that it remains intact
25375 across this call). */
25376 extract_constrain_insn_cached (first_insn
);
25380 arm_final_prescan_insn (rtx_insn
*insn
)
25382 /* BODY will hold the body of INSN. */
25383 rtx body
= PATTERN (insn
);
25385 /* This will be 1 if trying to repeat the trick, and things need to be
25386 reversed if it appears to fail. */
25389 /* If we start with a return insn, we only succeed if we find another one. */
25390 int seeking_return
= 0;
25391 enum rtx_code return_code
= UNKNOWN
;
25393 /* START_INSN will hold the insn from where we start looking. This is the
25394 first insn after the following code_label if REVERSE is true. */
25395 rtx_insn
*start_insn
= insn
;
25397 /* If in state 4, check if the target branch is reached, in order to
25398 change back to state 0. */
25399 if (arm_ccfsm_state
== 4)
25401 if (insn
== arm_target_insn
)
25403 arm_target_insn
= NULL
;
25404 arm_ccfsm_state
= 0;
25409 /* If in state 3, it is possible to repeat the trick, if this insn is an
25410 unconditional branch to a label, and immediately following this branch
25411 is the previous target label which is only used once, and the label this
25412 branch jumps to is not too far off. */
25413 if (arm_ccfsm_state
== 3)
25415 if (simplejump_p (insn
))
25417 start_insn
= next_nonnote_insn (start_insn
);
25418 if (BARRIER_P (start_insn
))
25420 /* XXX Isn't this always a barrier? */
25421 start_insn
= next_nonnote_insn (start_insn
);
25423 if (LABEL_P (start_insn
)
25424 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
25425 && LABEL_NUSES (start_insn
) == 1)
25430 else if (ANY_RETURN_P (body
))
25432 start_insn
= next_nonnote_insn (start_insn
);
25433 if (BARRIER_P (start_insn
))
25434 start_insn
= next_nonnote_insn (start_insn
);
25435 if (LABEL_P (start_insn
)
25436 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
25437 && LABEL_NUSES (start_insn
) == 1)
25440 seeking_return
= 1;
25441 return_code
= GET_CODE (body
);
25450 gcc_assert (!arm_ccfsm_state
|| reverse
);
25451 if (!JUMP_P (insn
))
25454 /* This jump might be paralleled with a clobber of the condition codes
25455 the jump should always come first */
25456 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
25457 body
= XVECEXP (body
, 0, 0);
25460 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
25461 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
25464 int fail
= FALSE
, succeed
= FALSE
;
25465 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25466 int then_not_else
= TRUE
;
25467 rtx_insn
*this_insn
= start_insn
;
25470 /* Register the insn jumped to. */
25473 if (!seeking_return
)
25474 label
= XEXP (SET_SRC (body
), 0);
25476 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
25477 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
25478 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
25480 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
25481 then_not_else
= FALSE
;
25483 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
25485 seeking_return
= 1;
25486 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
25488 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
25490 seeking_return
= 1;
25491 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
25492 then_not_else
= FALSE
;
25495 gcc_unreachable ();
25497 /* See how many insns this branch skips, and what kind of insns. If all
25498 insns are okay, and the label or unconditional branch to the same
25499 label is not too far away, succeed. */
25500 for (insns_skipped
= 0;
25501 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
25505 this_insn
= next_nonnote_insn (this_insn
);
25509 switch (GET_CODE (this_insn
))
25512 /* Succeed if it is the target label, otherwise fail since
25513 control falls in from somewhere else. */
25514 if (this_insn
== label
)
25516 arm_ccfsm_state
= 1;
25524 /* Succeed if the following insn is the target label.
25526 If return insns are used then the last insn in a function
25527 will be a barrier. */
25528 this_insn
= next_nonnote_insn (this_insn
);
25529 if (this_insn
&& this_insn
== label
)
25531 arm_ccfsm_state
= 1;
25539 /* The AAPCS says that conditional calls should not be
25540 used since they make interworking inefficient (the
25541 linker can't transform BL<cond> into BLX). That's
25542 only a problem if the machine has BLX. */
25549 /* Succeed if the following insn is the target label, or
25550 if the following two insns are a barrier and the
25552 this_insn
= next_nonnote_insn (this_insn
);
25553 if (this_insn
&& BARRIER_P (this_insn
))
25554 this_insn
= next_nonnote_insn (this_insn
);
25556 if (this_insn
&& this_insn
== label
25557 && insns_skipped
< max_insns_skipped
)
25559 arm_ccfsm_state
= 1;
25567 /* If this is an unconditional branch to the same label, succeed.
25568 If it is to another label, do nothing. If it is conditional,
25570 /* XXX Probably, the tests for SET and the PC are
25573 scanbody
= PATTERN (this_insn
);
25574 if (GET_CODE (scanbody
) == SET
25575 && GET_CODE (SET_DEST (scanbody
)) == PC
)
25577 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
25578 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
25580 arm_ccfsm_state
= 2;
25583 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
25586 /* Fail if a conditional return is undesirable (e.g. on a
25587 StrongARM), but still allow this if optimizing for size. */
25588 else if (GET_CODE (scanbody
) == return_code
25589 && !use_return_insn (TRUE
, NULL
)
25592 else if (GET_CODE (scanbody
) == return_code
)
25594 arm_ccfsm_state
= 2;
25597 else if (GET_CODE (scanbody
) == PARALLEL
)
25599 switch (get_attr_conds (this_insn
))
25609 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
25614 /* Instructions using or affecting the condition codes make it
25616 scanbody
= PATTERN (this_insn
);
25617 if (!(GET_CODE (scanbody
) == SET
25618 || GET_CODE (scanbody
) == PARALLEL
)
25619 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
25629 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
25630 arm_target_label
= CODE_LABEL_NUMBER (label
);
25633 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
25635 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
25637 this_insn
= next_nonnote_insn (this_insn
);
25638 gcc_assert (!this_insn
25639 || (!BARRIER_P (this_insn
)
25640 && !LABEL_P (this_insn
)));
25644 /* Oh, dear! we ran off the end.. give up. */
25645 extract_constrain_insn_cached (insn
);
25646 arm_ccfsm_state
= 0;
25647 arm_target_insn
= NULL
;
25650 arm_target_insn
= this_insn
;
25653 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25656 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
25658 if (reverse
|| then_not_else
)
25659 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
25662 /* Restore recog_data (getting the attributes of other insns can
25663 destroy this array, but final.cc assumes that it remains intact
25664 across this call. */
25665 extract_constrain_insn_cached (insn
);
25669 /* Output IT instructions. */
25671 thumb2_asm_output_opcode (FILE * stream
)
25676 if (arm_condexec_mask
)
25678 for (n
= 0; n
< arm_condexec_masklen
; n
++)
25679 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
25681 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
25682 arm_condition_codes
[arm_current_cc
]);
25683 arm_condexec_mask
= 0;
25687 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25688 UNITS_PER_WORD bytes wide. */
25689 static unsigned int
25690 arm_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
25692 if (IS_VPR_REGNUM (regno
))
25693 return CEIL (GET_MODE_SIZE (mode
), 2);
25696 && regno
> PC_REGNUM
25697 && regno
!= FRAME_POINTER_REGNUM
25698 && regno
!= ARG_POINTER_REGNUM
25699 && !IS_VFP_REGNUM (regno
))
25702 return ARM_NUM_REGS (mode
);
25705 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25707 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
25709 if (GET_MODE_CLASS (mode
) == MODE_CC
)
25710 return (regno
== CC_REGNUM
25711 || (TARGET_VFP_BASE
25712 && regno
== VFPCC_REGNUM
));
25714 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
25717 if (IS_VPR_REGNUM (regno
))
25718 return VALID_MVE_PRED_MODE (mode
);
25721 /* For the Thumb we only allow values bigger than SImode in
25722 registers 0 - 6, so that there is always a second low
25723 register available to hold the upper part of the value.
25724 We probably we ought to ensure that the register is the
25725 start of an even numbered register pair. */
25726 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
25728 if (TARGET_VFP_BASE
&& IS_VFP_REGNUM (regno
))
25730 if (mode
== DFmode
|| mode
== DImode
)
25731 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
25733 if (mode
== HFmode
|| mode
== BFmode
|| mode
== HImode
25734 || mode
== SFmode
|| mode
== SImode
)
25735 return VFP_REGNO_OK_FOR_SINGLE (regno
);
25738 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
25739 || (VALID_NEON_QREG_MODE (mode
)
25740 && NEON_REGNO_OK_FOR_QUAD (regno
))
25741 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
25742 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
25743 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25744 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
25745 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
25746 if (TARGET_HAVE_MVE
)
25747 return ((VALID_MVE_MODE (mode
) && NEON_REGNO_OK_FOR_QUAD (regno
))
25748 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25749 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8)));
25754 if (TARGET_REALLY_IWMMXT
)
25756 if (IS_IWMMXT_GR_REGNUM (regno
))
25757 return mode
== SImode
;
25759 if (IS_IWMMXT_REGNUM (regno
))
25760 return VALID_IWMMXT_REG_MODE (mode
);
25763 /* We allow almost any value to be stored in the general registers.
25764 Restrict doubleword quantities to even register pairs in ARM state
25765 so that we can use ldrd. The same restriction applies for MVE
25766 in order to support Armv8.1-M Mainline instructions.
25767 Do not allow very large Neon structure opaque modes in general
25768 registers; they would use too many. */
25769 if (regno
<= LAST_ARM_REGNUM
)
25771 if (ARM_NUM_REGS (mode
) > 4)
25774 if (TARGET_THUMB2
&& !(TARGET_HAVE_MVE
|| TARGET_CDE
))
25777 return !((TARGET_LDRD
|| TARGET_CDE
)
25778 && GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
25781 if (regno
== FRAME_POINTER_REGNUM
25782 || regno
== ARG_POINTER_REGNUM
)
25783 /* We only allow integers in the fake hard registers. */
25784 return GET_MODE_CLASS (mode
) == MODE_INT
;
25789 /* Implement TARGET_MODES_TIEABLE_P. */
25792 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
25794 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
25797 if (TARGET_HAVE_MVE
25798 && (VALID_MVE_PRED_MODE (mode1
) && VALID_MVE_PRED_MODE (mode2
)))
25801 /* We specifically want to allow elements of "structure" modes to
25802 be tieable to the structure. This more general condition allows
25803 other rarer situations too. */
25805 && (VALID_NEON_DREG_MODE (mode1
)
25806 || VALID_NEON_QREG_MODE (mode1
)
25807 || VALID_NEON_STRUCT_MODE (mode1
))
25808 && (VALID_NEON_DREG_MODE (mode2
)
25809 || VALID_NEON_QREG_MODE (mode2
)
25810 || VALID_NEON_STRUCT_MODE (mode2
)))
25811 || (TARGET_HAVE_MVE
25812 && (VALID_MVE_MODE (mode1
)
25813 || VALID_MVE_STRUCT_MODE (mode1
))
25814 && (VALID_MVE_MODE (mode2
)
25815 || VALID_MVE_STRUCT_MODE (mode2
))))
25821 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25822 not used in arm mode. */
25825 arm_regno_class (int regno
)
25827 if (regno
== PC_REGNUM
)
25830 if (IS_VPR_REGNUM (regno
))
25833 if (IS_PAC_REGNUM (regno
))
25838 if (regno
== STACK_POINTER_REGNUM
)
25840 if (regno
== CC_REGNUM
)
25847 if (TARGET_THUMB2
&& regno
< 8)
25850 if ( regno
<= LAST_ARM_REGNUM
25851 || regno
== FRAME_POINTER_REGNUM
25852 || regno
== ARG_POINTER_REGNUM
)
25853 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
25855 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
25856 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
25858 if (IS_VFP_REGNUM (regno
))
25860 if (regno
<= D7_VFP_REGNUM
)
25861 return VFP_D0_D7_REGS
;
25862 else if (regno
<= LAST_LO_VFP_REGNUM
)
25863 return VFP_LO_REGS
;
25865 return VFP_HI_REGS
;
25868 if (IS_IWMMXT_REGNUM (regno
))
25869 return IWMMXT_REGS
;
25871 if (IS_IWMMXT_GR_REGNUM (regno
))
25872 return IWMMXT_GR_REGS
;
25877 /* Handle a special case when computing the offset
25878 of an argument from the frame pointer. */
25880 arm_debugger_arg_offset (int value
, rtx addr
)
25884 /* We are only interested if dbxout_parms() failed to compute the offset. */
25888 /* We can only cope with the case where the address is held in a register. */
25892 /* If we are using the frame pointer to point at the argument, then
25893 an offset of 0 is correct. */
25894 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
25897 /* If we are using the stack pointer to point at the
25898 argument, then an offset of 0 is correct. */
25899 /* ??? Check this is consistent with thumb2 frame layout. */
25900 if ((TARGET_THUMB
|| !frame_pointer_needed
)
25901 && REGNO (addr
) == SP_REGNUM
)
25904 /* Oh dear. The argument is pointed to by a register rather
25905 than being held in a register, or being stored at a known
25906 offset from the frame pointer. Since GDB only understands
25907 those two kinds of argument we must translate the address
25908 held in the register into an offset from the frame pointer.
25909 We do this by searching through the insns for the function
25910 looking to see where this register gets its value. If the
25911 register is initialized from the frame pointer plus an offset
25912 then we are in luck and we can continue, otherwise we give up.
25914 This code is exercised by producing debugging information
25915 for a function with arguments like this:
25917 double func (double a, double b, int c, double d) {return d;}
25919 Without this code the stab for parameter 'd' will be set to
25920 an offset of 0 from the frame pointer, rather than 8. */
25922 /* The if() statement says:
25924 If the insn is a normal instruction
25925 and if the insn is setting the value in a register
25926 and if the register being set is the register holding the address of the argument
25927 and if the address is computing by an addition
25928 that involves adding to a register
25929 which is the frame pointer
25934 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
25936 if ( NONJUMP_INSN_P (insn
)
25937 && GET_CODE (PATTERN (insn
)) == SET
25938 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
25939 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
25940 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
25941 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25942 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
25945 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
25954 warning (0, "unable to compute real location of stacked parameter");
25955 value
= 8; /* XXX magic hack */
25961 /* Implement TARGET_PROMOTED_TYPE. */
25964 arm_promoted_type (const_tree t
)
25966 if (SCALAR_FLOAT_TYPE_P (t
)
25967 && TYPE_PRECISION (t
) == 16
25968 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
25969 return float_type_node
;
25973 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25974 This simply adds HFmode as a supported mode; even though we don't
25975 implement arithmetic on this type directly, it's supported by
25976 optabs conversions, much the way the double-word arithmetic is
25977 special-cased in the default hook. */
25980 arm_scalar_mode_supported_p (scalar_mode mode
)
25982 if (mode
== HFmode
)
25983 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
25984 else if (ALL_FIXED_POINT_MODE_P (mode
))
25987 return default_scalar_mode_supported_p (mode
);
25990 /* Set the value of FLT_EVAL_METHOD.
25991 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25993 0: evaluate all operations and constants, whose semantic type has at
25994 most the range and precision of type float, to the range and
25995 precision of float; evaluate all other operations and constants to
25996 the range and precision of the semantic type;
25998 N, where _FloatN is a supported interchange floating type
25999 evaluate all operations and constants, whose semantic type has at
26000 most the range and precision of _FloatN type, to the range and
26001 precision of the _FloatN type; evaluate all other operations and
26002 constants to the range and precision of the semantic type;
26004 If we have the ARMv8.2-A extensions then we support _Float16 in native
26005 precision, so we should set this to 16. Otherwise, we support the type,
26006 but want to evaluate expressions in float precision, so set this to
26009 static enum flt_eval_method
26010 arm_excess_precision (enum excess_precision_type type
)
26014 case EXCESS_PRECISION_TYPE_FAST
:
26015 case EXCESS_PRECISION_TYPE_STANDARD
:
26016 /* We can calculate either in 16-bit range and precision or
26017 32-bit range and precision. Make that decision based on whether
26018 we have native support for the ARMv8.2-A 16-bit floating-point
26019 instructions or not. */
26020 return (TARGET_VFP_FP16INST
26021 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
26022 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
26023 case EXCESS_PRECISION_TYPE_IMPLICIT
:
26024 case EXCESS_PRECISION_TYPE_FLOAT16
:
26025 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
26027 gcc_unreachable ();
26029 return FLT_EVAL_METHOD_UNPREDICTABLE
;
26033 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
26034 _Float16 if we are using anything other than ieee format for 16-bit
26035 floating point. Otherwise, punt to the default implementation. */
26036 static opt_scalar_float_mode
26037 arm_floatn_mode (int n
, bool extended
)
26039 if (!extended
&& n
== 16)
26041 if (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
)
26043 return opt_scalar_float_mode ();
26046 return default_floatn_mode (n
, extended
);
26050 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
26051 not to early-clobber SRC registers in the process.
26053 We assume that the operands described by SRC and DEST represent a
26054 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
26055 number of components into which the copy has been decomposed. */
26057 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
26061 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
26062 || REGNO (operands
[0]) < REGNO (operands
[1]))
26064 for (i
= 0; i
< count
; i
++)
26066 operands
[2 * i
] = dest
[i
];
26067 operands
[2 * i
+ 1] = src
[i
];
26072 for (i
= 0; i
< count
; i
++)
26074 operands
[2 * i
] = dest
[count
- i
- 1];
26075 operands
[2 * i
+ 1] = src
[count
- i
- 1];
26080 /* Split operands into moves from op[1] + op[2] into op[0]. */
26083 neon_split_vcombine (rtx operands
[3])
26085 unsigned int dest
= REGNO (operands
[0]);
26086 unsigned int src1
= REGNO (operands
[1]);
26087 unsigned int src2
= REGNO (operands
[2]);
26088 machine_mode halfmode
= GET_MODE (operands
[1]);
26089 unsigned int halfregs
= REG_NREGS (operands
[1]);
26090 rtx destlo
, desthi
;
26092 if (src1
== dest
&& src2
== dest
+ halfregs
)
26094 /* No-op move. Can't split to nothing; emit something. */
26095 emit_note (NOTE_INSN_DELETED
);
26099 /* Preserve register attributes for variable tracking. */
26100 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
26101 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
26102 GET_MODE_SIZE (halfmode
));
26104 /* Special case of reversed high/low parts. Use VSWP. */
26105 if (src2
== dest
&& src1
== dest
+ halfregs
)
26107 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
26108 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
26109 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
26113 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
26115 /* Try to avoid unnecessary moves if part of the result
26116 is in the right place already. */
26118 emit_move_insn (destlo
, operands
[1]);
26119 if (src2
!= dest
+ halfregs
)
26120 emit_move_insn (desthi
, operands
[2]);
26124 if (src2
!= dest
+ halfregs
)
26125 emit_move_insn (desthi
, operands
[2]);
26127 emit_move_insn (destlo
, operands
[1]);
26131 /* Return the number (counting from 0) of
26132 the least significant set bit in MASK. */
26135 number_of_first_bit_set (unsigned mask
)
26137 return ctz_hwi (mask
);
26140 /* Like emit_multi_reg_push, but allowing for a different set of
26141 registers to be described as saved. MASK is the set of registers
26142 to be saved; REAL_REGS is the set of registers to be described as
26143 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26146 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
26148 unsigned long regno
;
26149 rtx par
[10], tmp
, reg
;
26153 /* Build the parallel of the registers actually being stored. */
26154 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
26156 regno
= ctz_hwi (mask
);
26157 reg
= gen_rtx_REG (SImode
, regno
);
26160 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
26162 tmp
= gen_rtx_USE (VOIDmode
, reg
);
26167 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26168 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
26169 tmp
= gen_frame_mem (BLKmode
, tmp
);
26170 tmp
= gen_rtx_SET (tmp
, par
[0]);
26173 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
26174 insn
= emit_insn (tmp
);
26176 /* Always build the stack adjustment note for unwind info. */
26177 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26178 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
26181 /* Build the parallel of the registers recorded as saved for unwind. */
26182 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
26184 regno
= ctz_hwi (real_regs
);
26185 reg
= gen_rtx_REG (SImode
, regno
);
26187 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
26188 tmp
= gen_frame_mem (SImode
, tmp
);
26189 tmp
= gen_rtx_SET (tmp
, reg
);
26190 RTX_FRAME_RELATED_P (tmp
) = 1;
26198 RTX_FRAME_RELATED_P (par
[0]) = 1;
26199 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
26202 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
26207 /* Emit code to push or pop registers to or from the stack. F is the
26208 assembly file. MASK is the registers to pop. */
26210 thumb_pop (FILE *f
, unsigned long mask
)
26213 int lo_mask
= mask
& 0xFF;
26217 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
26219 /* Special case. Do not generate a POP PC statement here, do it in
26221 thumb_exit (f
, -1);
26225 fprintf (f
, "\tpop\t{");
26227 /* Look at the low registers first. */
26228 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
26232 asm_fprintf (f
, "%r", regno
);
26234 if ((lo_mask
& ~1) != 0)
26239 if (mask
& (1 << PC_REGNUM
))
26241 /* Catch popping the PC. */
26242 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
26243 || IS_CMSE_ENTRY (arm_current_func_type ()))
26245 /* The PC is never poped directly, instead
26246 it is popped into r3 and then BX is used. */
26247 fprintf (f
, "}\n");
26249 thumb_exit (f
, -1);
26258 asm_fprintf (f
, "%r", PC_REGNUM
);
26262 fprintf (f
, "}\n");
26265 /* Generate code to return from a thumb function.
26266 If 'reg_containing_return_addr' is -1, then the return address is
26267 actually on the stack, at the stack pointer.
26269 Note: do not forget to update length attribute of corresponding insn pattern
26270 when changing assembly output (eg. length attribute of epilogue_insns when
26271 updating Armv8-M Baseline Security Extensions register clearing
26274 thumb_exit (FILE *f
, int reg_containing_return_addr
)
26276 unsigned regs_available_for_popping
;
26277 unsigned regs_to_pop
;
26279 unsigned available
;
26283 int restore_a4
= FALSE
;
26285 /* Compute the registers we need to pop. */
26289 if (reg_containing_return_addr
== -1)
26291 regs_to_pop
|= 1 << LR_REGNUM
;
26295 if (TARGET_BACKTRACE
)
26297 /* Restore the (ARM) frame pointer and stack pointer. */
26298 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
26302 /* If there is nothing to pop then just emit the BX instruction and
26304 if (pops_needed
== 0)
26306 if (crtl
->calls_eh_return
)
26307 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26309 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26311 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26312 emitted by cmse_nonsecure_entry_clear_before_return (). */
26313 if (!TARGET_HAVE_FPCXT_CMSE
)
26314 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
26315 reg_containing_return_addr
);
26316 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
26319 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26322 /* Otherwise if we are not supporting interworking and we have not created
26323 a backtrace structure and the function was not entered in ARM mode then
26324 just pop the return address straight into the PC. */
26325 else if (!TARGET_INTERWORK
26326 && !TARGET_BACKTRACE
26327 && !is_called_in_ARM_mode (current_function_decl
)
26328 && !crtl
->calls_eh_return
26329 && !IS_CMSE_ENTRY (arm_current_func_type ()))
26331 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
26335 /* Find out how many of the (return) argument registers we can corrupt. */
26336 regs_available_for_popping
= 0;
26338 /* If returning via __builtin_eh_return, the bottom three registers
26339 all contain information needed for the return. */
26340 if (crtl
->calls_eh_return
)
26344 /* If we can deduce the registers used from the function's
26345 return value. This is more reliable that examining
26346 df_regs_ever_live_p () because that will be set if the register is
26347 ever used in the function, not just if the register is used
26348 to hold a return value. */
26350 if (crtl
->return_rtx
!= 0)
26351 mode
= GET_MODE (crtl
->return_rtx
);
26353 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
26355 size
= GET_MODE_SIZE (mode
);
26359 /* In a void function we can use any argument register.
26360 In a function that returns a structure on the stack
26361 we can use the second and third argument registers. */
26362 if (mode
== VOIDmode
)
26363 regs_available_for_popping
=
26364 (1 << ARG_REGISTER (1))
26365 | (1 << ARG_REGISTER (2))
26366 | (1 << ARG_REGISTER (3));
26368 regs_available_for_popping
=
26369 (1 << ARG_REGISTER (2))
26370 | (1 << ARG_REGISTER (3));
26372 else if (size
<= 4)
26373 regs_available_for_popping
=
26374 (1 << ARG_REGISTER (2))
26375 | (1 << ARG_REGISTER (3));
26376 else if (size
<= 8)
26377 regs_available_for_popping
=
26378 (1 << ARG_REGISTER (3));
26381 /* Match registers to be popped with registers into which we pop them. */
26382 for (available
= regs_available_for_popping
,
26383 required
= regs_to_pop
;
26384 required
!= 0 && available
!= 0;
26385 available
&= ~(available
& - available
),
26386 required
&= ~(required
& - required
))
26389 /* If we have any popping registers left over, remove them. */
26391 regs_available_for_popping
&= ~available
;
26393 /* Otherwise if we need another popping register we can use
26394 the fourth argument register. */
26395 else if (pops_needed
)
26397 /* If we have not found any free argument registers and
26398 reg a4 contains the return address, we must move it. */
26399 if (regs_available_for_popping
== 0
26400 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
26402 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26403 reg_containing_return_addr
= LR_REGNUM
;
26405 else if (size
> 12)
26407 /* Register a4 is being used to hold part of the return value,
26408 but we have dire need of a free, low register. */
26411 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
26414 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
26416 /* The fourth argument register is available. */
26417 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
26423 /* Pop as many registers as we can. */
26424 thumb_pop (f
, regs_available_for_popping
);
26426 /* Process the registers we popped. */
26427 if (reg_containing_return_addr
== -1)
26429 /* The return address was popped into the lowest numbered register. */
26430 regs_to_pop
&= ~(1 << LR_REGNUM
);
26432 reg_containing_return_addr
=
26433 number_of_first_bit_set (regs_available_for_popping
);
26435 /* Remove this register for the mask of available registers, so that
26436 the return address will not be corrupted by further pops. */
26437 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
26440 /* If we popped other registers then handle them here. */
26441 if (regs_available_for_popping
)
26445 /* Work out which register currently contains the frame pointer. */
26446 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26448 /* Move it into the correct place. */
26449 asm_fprintf (f
, "\tmov\t%r, %r\n",
26450 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
26452 /* (Temporarily) remove it from the mask of popped registers. */
26453 regs_available_for_popping
&= ~(1 << frame_pointer
);
26454 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
26456 if (regs_available_for_popping
)
26460 /* We popped the stack pointer as well,
26461 find the register that contains it. */
26462 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26464 /* Move it into the stack register. */
26465 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
26467 /* At this point we have popped all necessary registers, so
26468 do not worry about restoring regs_available_for_popping
26469 to its correct value:
26471 assert (pops_needed == 0)
26472 assert (regs_available_for_popping == (1 << frame_pointer))
26473 assert (regs_to_pop == (1 << STACK_POINTER)) */
26477 /* Since we have just move the popped value into the frame
26478 pointer, the popping register is available for reuse, and
26479 we know that we still have the stack pointer left to pop. */
26480 regs_available_for_popping
|= (1 << frame_pointer
);
26484 /* If we still have registers left on the stack, but we no longer have
26485 any registers into which we can pop them, then we must move the return
26486 address into the link register and make available the register that
26488 if (regs_available_for_popping
== 0 && pops_needed
> 0)
26490 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
26492 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
26493 reg_containing_return_addr
);
26495 reg_containing_return_addr
= LR_REGNUM
;
26498 /* If we have registers left on the stack then pop some more.
26499 We know that at most we will want to pop FP and SP. */
26500 if (pops_needed
> 0)
26505 thumb_pop (f
, regs_available_for_popping
);
26507 /* We have popped either FP or SP.
26508 Move whichever one it is into the correct register. */
26509 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26510 move_to
= number_of_first_bit_set (regs_to_pop
);
26512 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
26516 /* If we still have not popped everything then we must have only
26517 had one register available to us and we are now popping the SP. */
26518 if (pops_needed
> 0)
26522 thumb_pop (f
, regs_available_for_popping
);
26524 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26526 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
26528 assert (regs_to_pop == (1 << STACK_POINTER))
26529 assert (pops_needed == 1)
26533 /* If necessary restore the a4 register. */
26536 if (reg_containing_return_addr
!= LR_REGNUM
)
26538 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26539 reg_containing_return_addr
= LR_REGNUM
;
26542 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
26545 if (crtl
->calls_eh_return
)
26546 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26548 /* Return to caller. */
26549 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26551 /* This is for the cases where LR is not being used to contain the return
26552 address. It may therefore contain information that we might not want
26553 to leak, hence it must be cleared. The value in R0 will never be a
26554 secret at this point, so it is safe to use it, see the clearing code
26555 in cmse_nonsecure_entry_clear_before_return (). */
26556 if (reg_containing_return_addr
!= LR_REGNUM
)
26557 asm_fprintf (f
, "\tmov\tlr, r0\n");
26559 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26560 by cmse_nonsecure_entry_clear_before_return (). */
26561 if (!TARGET_HAVE_FPCXT_CMSE
)
26562 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
26563 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
26566 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26569 /* Scan INSN just before assembler is output for it.
26570 For Thumb-1, we track the status of the condition codes; this
26571 information is used in the cbranchsi4_insn pattern. */
26573 thumb1_final_prescan_insn (rtx_insn
*insn
)
26575 if (flag_print_asm_name
)
26576 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
26577 INSN_ADDRESSES (INSN_UID (insn
)));
26578 /* Don't overwrite the previous setter when we get to a cbranch. */
26579 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
26581 enum attr_conds conds
;
26583 if (cfun
->machine
->thumb1_cc_insn
)
26585 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
26586 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
26589 conds
= get_attr_conds (insn
);
26590 if (conds
== CONDS_SET
)
26592 rtx set
= single_set (insn
);
26593 cfun
->machine
->thumb1_cc_insn
= insn
;
26594 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
26595 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
26596 cfun
->machine
->thumb1_cc_mode
= CC_NZmode
;
26597 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
26599 rtx src1
= XEXP (SET_SRC (set
), 1);
26600 if (src1
== const0_rtx
)
26601 cfun
->machine
->thumb1_cc_mode
= CCmode
;
26603 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
26605 /* Record the src register operand instead of dest because
26606 cprop_hardreg pass propagates src. */
26607 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
26610 else if (conds
!= CONDS_NOCOND
)
26611 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
26614 /* Check if unexpected far jump is used. */
26615 if (cfun
->machine
->lr_save_eliminated
26616 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26617 internal_error("Unexpected thumb1 far jump");
26621 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
26623 unsigned HOST_WIDE_INT mask
= 0xff;
26626 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
26627 if (val
== 0) /* XXX */
26630 for (i
= 0; i
< 25; i
++)
26631 if ((val
& (mask
<< i
)) == val
)
26637 /* Returns nonzero if the current function contains,
26638 or might contain a far jump. */
26640 thumb_far_jump_used_p (void)
26643 bool far_jump
= false;
26644 unsigned int func_size
= 0;
26646 /* If we have already decided that far jumps may be used,
26647 do not bother checking again, and always return true even if
26648 it turns out that they are not being used. Once we have made
26649 the decision that far jumps are present (and that hence the link
26650 register will be pushed onto the stack) we cannot go back on it. */
26651 if (cfun
->machine
->far_jump_used
)
26654 /* If this function is not being called from the prologue/epilogue
26655 generation code then it must be being called from the
26656 INITIAL_ELIMINATION_OFFSET macro. */
26657 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
26659 /* In this case we know that we are being asked about the elimination
26660 of the arg pointer register. If that register is not being used,
26661 then there are no arguments on the stack, and we do not have to
26662 worry that a far jump might force the prologue to push the link
26663 register, changing the stack offsets. In this case we can just
26664 return false, since the presence of far jumps in the function will
26665 not affect stack offsets.
26667 If the arg pointer is live (or if it was live, but has now been
26668 eliminated and so set to dead) then we do have to test to see if
26669 the function might contain a far jump. This test can lead to some
26670 false negatives, since before reload is completed, then length of
26671 branch instructions is not known, so gcc defaults to returning their
26672 longest length, which in turn sets the far jump attribute to true.
26674 A false negative will not result in bad code being generated, but it
26675 will result in a needless push and pop of the link register. We
26676 hope that this does not occur too often.
26678 If we need doubleword stack alignment this could affect the other
26679 elimination offsets so we can't risk getting it wrong. */
26680 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
26681 cfun
->machine
->arg_pointer_live
= 1;
26682 else if (!cfun
->machine
->arg_pointer_live
)
26686 /* We should not change far_jump_used during or after reload, as there is
26687 no chance to change stack frame layout. */
26688 if (reload_in_progress
|| reload_completed
)
26691 /* Check to see if the function contains a branch
26692 insn with the far jump attribute set. */
26693 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
26695 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26699 func_size
+= get_attr_length (insn
);
26702 /* Attribute far_jump will always be true for thumb1 before
26703 shorten_branch pass. So checking far_jump attribute before
26704 shorten_branch isn't much useful.
26706 Following heuristic tries to estimate more accurately if a far jump
26707 may finally be used. The heuristic is very conservative as there is
26708 no chance to roll-back the decision of not to use far jump.
26710 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26711 2-byte insn is associated with a 4 byte constant pool. Using
26712 function size 2048/3 as the threshold is conservative enough. */
26715 if ((func_size
* 3) >= 2048)
26717 /* Record the fact that we have decided that
26718 the function does use far jumps. */
26719 cfun
->machine
->far_jump_used
= 1;
26727 /* Return nonzero if FUNC must be entered in ARM mode. */
26729 is_called_in_ARM_mode (tree func
)
26731 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
26733 /* Ignore the problem about functions whose address is taken. */
26734 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
26738 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
26744 /* Given the stack offsets and register mask in OFFSETS, decide how
26745 many additional registers to push instead of subtracting a constant
26746 from SP. For epilogues the principle is the same except we use pop.
26747 FOR_PROLOGUE indicates which we're generating. */
26749 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
26751 HOST_WIDE_INT amount
;
26752 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
26753 /* Extract a mask of the ones we can give to the Thumb's push/pop
26755 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
26756 /* Then count how many other high registers will need to be pushed. */
26757 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26758 int n_free
, reg_base
, size
;
26760 if (!for_prologue
&& frame_pointer_needed
)
26761 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26763 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26765 /* If the stack frame size is 512 exactly, we can save one load
26766 instruction, which should make this a win even when optimizing
26768 if (!optimize_size
&& amount
!= 512)
26771 /* Can't do this if there are high registers to push. */
26772 if (high_regs_pushed
!= 0)
26775 /* Shouldn't do it in the prologue if no registers would normally
26776 be pushed at all. In the epilogue, also allow it if we'll have
26777 a pop insn for the PC. */
26780 || TARGET_BACKTRACE
26781 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
26782 || TARGET_INTERWORK
26783 || crtl
->args
.pretend_args_size
!= 0))
26786 /* Don't do this if thumb_expand_prologue wants to emit instructions
26787 between the push and the stack frame allocation. */
26789 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26790 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
26797 size
= arm_size_return_regs ();
26798 reg_base
= ARM_NUM_INTS (size
);
26799 live_regs_mask
>>= reg_base
;
26802 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
26803 && (for_prologue
|| call_used_or_fixed_reg_p (reg_base
+ n_free
)))
26805 live_regs_mask
>>= 1;
26811 gcc_assert (amount
/ 4 * 4 == amount
);
26813 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
26814 return (amount
- 508) / 4;
26815 if (amount
<= n_free
* 4)
26820 /* The bits which aren't usefully expanded as rtl. */
26822 thumb1_unexpanded_epilogue (void)
26824 arm_stack_offsets
*offsets
;
26826 unsigned long live_regs_mask
= 0;
26827 int high_regs_pushed
= 0;
26829 int had_to_push_lr
;
26832 if (cfun
->machine
->return_used_this_function
!= 0)
26835 if (IS_NAKED (arm_current_func_type ()))
26838 offsets
= arm_get_frame_offsets ();
26839 live_regs_mask
= offsets
->saved_regs_mask
;
26840 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26842 /* If we can deduce the registers used from the function's return value.
26843 This is more reliable that examining df_regs_ever_live_p () because that
26844 will be set if the register is ever used in the function, not just if
26845 the register is used to hold a return value. */
26846 size
= arm_size_return_regs ();
26848 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
26851 unsigned long extra_mask
= (1 << extra_pop
) - 1;
26852 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
26855 /* The prolog may have pushed some high registers to use as
26856 work registers. e.g. the testsuite file:
26857 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26858 compiles to produce:
26859 push {r4, r5, r6, r7, lr}
26863 as part of the prolog. We have to undo that pushing here. */
26865 if (high_regs_pushed
)
26867 unsigned long mask
= live_regs_mask
& 0xff;
26870 mask
|= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26873 /* Oh dear! We have no low registers into which we can pop
26876 ("no low registers available for popping high registers");
26878 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
26879 if (live_regs_mask
& (1 << next_hi_reg
))
26882 while (high_regs_pushed
)
26884 /* Find lo register(s) into which the high register(s) can
26886 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
26888 if (mask
& (1 << regno
))
26889 high_regs_pushed
--;
26890 if (high_regs_pushed
== 0)
26894 if (high_regs_pushed
== 0 && regno
>= 0)
26895 mask
&= ~((1 << regno
) - 1);
26897 /* Pop the values into the low register(s). */
26898 thumb_pop (asm_out_file
, mask
);
26900 /* Move the value(s) into the high registers. */
26901 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
26903 if (mask
& (1 << regno
))
26905 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
26908 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
26910 if (live_regs_mask
& (1 << next_hi_reg
))
26915 live_regs_mask
&= ~0x0f00;
26918 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
26919 live_regs_mask
&= 0xff;
26921 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
26923 /* Pop the return address into the PC. */
26924 if (had_to_push_lr
)
26925 live_regs_mask
|= 1 << PC_REGNUM
;
26927 /* Either no argument registers were pushed or a backtrace
26928 structure was created which includes an adjusted stack
26929 pointer, so just pop everything. */
26930 if (live_regs_mask
)
26931 thumb_pop (asm_out_file
, live_regs_mask
);
26933 /* We have either just popped the return address into the
26934 PC or it is was kept in LR for the entire function.
26935 Note that thumb_pop has already called thumb_exit if the
26936 PC was in the list. */
26937 if (!had_to_push_lr
)
26938 thumb_exit (asm_out_file
, LR_REGNUM
);
26942 /* Pop everything but the return address. */
26943 if (live_regs_mask
)
26944 thumb_pop (asm_out_file
, live_regs_mask
);
26946 if (had_to_push_lr
)
26950 /* We have no free low regs, so save one. */
26951 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
26955 /* Get the return address into a temporary register. */
26956 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
26960 /* Move the return address to lr. */
26961 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
26963 /* Restore the low register. */
26964 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
26969 regno
= LAST_ARG_REGNUM
;
26974 /* Remove the argument registers that were pushed onto the stack. */
26975 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
26976 SP_REGNUM
, SP_REGNUM
,
26977 crtl
->args
.pretend_args_size
);
26979 thumb_exit (asm_out_file
, regno
);
26985 /* Functions to save and restore machine-specific function data. */
26986 static struct machine_function
*
26987 arm_init_machine_status (void)
26989 struct machine_function
*machine
;
26990 machine
= ggc_cleared_alloc
<machine_function
> ();
26992 #if ARM_FT_UNKNOWN != 0
26993 machine
->func_type
= ARM_FT_UNKNOWN
;
26995 machine
->static_chain_stack_bytes
= -1;
26996 machine
->pacspval_needed
= 0;
27000 /* Return an RTX indicating where the return address to the
27001 calling function can be found. */
27003 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
27008 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
27011 /* Do anything needed before RTL is emitted for each function. */
27013 arm_init_expanders (void)
27015 /* Arrange to initialize and mark the machine per-function status. */
27016 init_machine_status
= arm_init_machine_status
;
27018 /* This is to stop the combine pass optimizing away the alignment
27019 adjustment of va_arg. */
27020 /* ??? It is claimed that this should not be necessary. */
27022 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
27025 /* Check that FUNC is called with a different mode. */
27028 arm_change_mode_p (tree func
)
27030 if (TREE_CODE (func
) != FUNCTION_DECL
)
27033 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
27036 callee_tree
= target_option_default_node
;
27038 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
27039 int flags
= callee_opts
->x_target_flags
;
27041 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
27044 /* Like arm_compute_initial_elimination offset. Simpler because there
27045 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27046 to point at the base of the local variables after static stack
27047 space for a function has been allocated. */
27050 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
27052 arm_stack_offsets
*offsets
;
27054 offsets
= arm_get_frame_offsets ();
27058 case ARG_POINTER_REGNUM
:
27061 case STACK_POINTER_REGNUM
:
27062 return offsets
->outgoing_args
- offsets
->saved_args
;
27064 case FRAME_POINTER_REGNUM
:
27065 return offsets
->soft_frame
- offsets
->saved_args
;
27067 case ARM_HARD_FRAME_POINTER_REGNUM
:
27068 return offsets
->saved_regs
- offsets
->saved_args
;
27070 case THUMB_HARD_FRAME_POINTER_REGNUM
:
27071 return offsets
->locals_base
- offsets
->saved_args
;
27074 gcc_unreachable ();
27078 case FRAME_POINTER_REGNUM
:
27081 case STACK_POINTER_REGNUM
:
27082 return offsets
->outgoing_args
- offsets
->soft_frame
;
27084 case ARM_HARD_FRAME_POINTER_REGNUM
:
27085 return offsets
->saved_regs
- offsets
->soft_frame
;
27087 case THUMB_HARD_FRAME_POINTER_REGNUM
:
27088 return offsets
->locals_base
- offsets
->soft_frame
;
27091 gcc_unreachable ();
27096 gcc_unreachable ();
27100 /* Generate the function's prologue. */
27103 thumb1_expand_prologue (void)
27107 HOST_WIDE_INT amount
;
27108 HOST_WIDE_INT size
;
27109 arm_stack_offsets
*offsets
;
27110 unsigned long func_type
;
27112 unsigned long live_regs_mask
;
27113 unsigned long l_mask
;
27114 unsigned high_regs_pushed
= 0;
27115 bool lr_needs_saving
;
27117 func_type
= arm_current_func_type ();
27119 /* Naked functions don't have prologues. */
27120 if (IS_NAKED (func_type
))
27122 if (flag_stack_usage_info
)
27123 current_function_static_stack_size
= 0;
27127 if (IS_INTERRUPT (func_type
))
27129 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
27133 if (is_called_in_ARM_mode (current_function_decl
))
27134 emit_insn (gen_prologue_thumb1_interwork ());
27136 offsets
= arm_get_frame_offsets ();
27137 live_regs_mask
= offsets
->saved_regs_mask
;
27138 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
27140 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27141 l_mask
= live_regs_mask
& 0x40ff;
27142 /* Then count how many other high registers will need to be pushed. */
27143 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
27145 if (crtl
->args
.pretend_args_size
)
27147 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
27149 if (cfun
->machine
->uses_anonymous_args
)
27151 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
27152 unsigned long mask
;
27154 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
27155 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
27157 insn
= thumb1_emit_multi_reg_push (mask
, 0);
27161 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27162 stack_pointer_rtx
, x
));
27164 RTX_FRAME_RELATED_P (insn
) = 1;
27167 if (TARGET_BACKTRACE
)
27169 HOST_WIDE_INT offset
= 0;
27170 unsigned work_register
;
27171 rtx work_reg
, x
, arm_hfp_rtx
;
27173 /* We have been asked to create a stack backtrace structure.
27174 The code looks like this:
27178 0 sub SP, #16 Reserve space for 4 registers.
27179 2 push {R7} Push low registers.
27180 4 add R7, SP, #20 Get the stack pointer before the push.
27181 6 str R7, [SP, #8] Store the stack pointer
27182 (before reserving the space).
27183 8 mov R7, PC Get hold of the start of this code + 12.
27184 10 str R7, [SP, #16] Store it.
27185 12 mov R7, FP Get hold of the current frame pointer.
27186 14 str R7, [SP, #4] Store it.
27187 16 mov R7, LR Get hold of the current return address.
27188 18 str R7, [SP, #12] Store it.
27189 20 add R7, SP, #16 Point at the start of the
27190 backtrace structure.
27191 22 mov FP, R7 Put this value into the frame pointer. */
27193 work_register
= thumb_find_work_register (live_regs_mask
);
27194 work_reg
= gen_rtx_REG (SImode
, work_register
);
27195 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
27197 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27198 stack_pointer_rtx
, GEN_INT (-16)));
27199 RTX_FRAME_RELATED_P (insn
) = 1;
27203 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
27204 RTX_FRAME_RELATED_P (insn
) = 1;
27205 lr_needs_saving
= false;
27207 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
27210 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
27211 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27213 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
27214 x
= gen_frame_mem (SImode
, x
);
27215 emit_move_insn (x
, work_reg
);
27217 /* Make sure that the instruction fetching the PC is in the right place
27218 to calculate "start of backtrace creation code + 12". */
27219 /* ??? The stores using the common WORK_REG ought to be enough to
27220 prevent the scheduler from doing anything weird. Failing that
27221 we could always move all of the following into an UNSPEC_VOLATILE. */
27224 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27225 emit_move_insn (work_reg
, x
);
27227 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27228 x
= gen_frame_mem (SImode
, x
);
27229 emit_move_insn (x
, work_reg
);
27231 emit_move_insn (work_reg
, arm_hfp_rtx
);
27233 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27234 x
= gen_frame_mem (SImode
, x
);
27235 emit_move_insn (x
, work_reg
);
27239 emit_move_insn (work_reg
, arm_hfp_rtx
);
27241 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27242 x
= gen_frame_mem (SImode
, x
);
27243 emit_move_insn (x
, work_reg
);
27245 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27246 emit_move_insn (work_reg
, x
);
27248 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27249 x
= gen_frame_mem (SImode
, x
);
27250 emit_move_insn (x
, work_reg
);
27253 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
27254 emit_move_insn (work_reg
, x
);
27256 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
27257 x
= gen_frame_mem (SImode
, x
);
27258 emit_move_insn (x
, work_reg
);
27260 x
= GEN_INT (offset
+ 12);
27261 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27263 emit_move_insn (arm_hfp_rtx
, work_reg
);
27265 /* Optimization: If we are not pushing any low registers but we are going
27266 to push some high registers then delay our first push. This will just
27267 be a push of LR and we can combine it with the push of the first high
27269 else if ((l_mask
& 0xff) != 0
27270 || (high_regs_pushed
== 0 && lr_needs_saving
))
27272 unsigned long mask
= l_mask
;
27273 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
27274 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
27275 RTX_FRAME_RELATED_P (insn
) = 1;
27276 lr_needs_saving
= false;
27279 if (high_regs_pushed
)
27281 unsigned pushable_regs
;
27282 unsigned next_hi_reg
;
27283 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
27284 : crtl
->args
.info
.nregs
;
27285 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
27287 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
27288 if (live_regs_mask
& (1 << next_hi_reg
))
27291 /* Here we need to mask out registers used for passing arguments
27292 even if they can be pushed. This is to avoid using them to
27293 stash the high registers. Such kind of stash may clobber the
27294 use of arguments. */
27295 pushable_regs
= l_mask
& (~arg_regs_mask
);
27296 pushable_regs
|= thumb1_prologue_unused_call_clobbered_lo_regs ();
27298 /* Normally, LR can be used as a scratch register once it has been
27299 saved; but if the function examines its own return address then
27300 the value is still live and we need to avoid using it. */
27301 bool return_addr_live
27302 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
27305 if (lr_needs_saving
|| return_addr_live
)
27306 pushable_regs
&= ~(1 << LR_REGNUM
);
27308 if (pushable_regs
== 0)
27309 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
27311 while (high_regs_pushed
> 0)
27313 unsigned long real_regs_mask
= 0;
27314 unsigned long push_mask
= 0;
27316 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
27318 if (pushable_regs
& (1 << regno
))
27320 emit_move_insn (gen_rtx_REG (SImode
, regno
),
27321 gen_rtx_REG (SImode
, next_hi_reg
));
27323 high_regs_pushed
--;
27324 real_regs_mask
|= (1 << next_hi_reg
);
27325 push_mask
|= (1 << regno
);
27327 if (high_regs_pushed
)
27329 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
27331 if (live_regs_mask
& (1 << next_hi_reg
))
27339 /* If we had to find a work register and we have not yet
27340 saved the LR then add it to the list of regs to push. */
27341 if (lr_needs_saving
)
27343 push_mask
|= 1 << LR_REGNUM
;
27344 real_regs_mask
|= 1 << LR_REGNUM
;
27345 lr_needs_saving
= false;
27346 /* If the return address is not live at this point, we
27347 can add LR to the list of registers that we can use
27349 if (!return_addr_live
)
27350 pushable_regs
|= 1 << LR_REGNUM
;
27353 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
27354 RTX_FRAME_RELATED_P (insn
) = 1;
27358 /* Load the pic register before setting the frame pointer,
27359 so we can use r7 as a temporary work register. */
27360 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
27361 arm_load_pic_register (live_regs_mask
, NULL_RTX
);
27363 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
27364 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
27365 stack_pointer_rtx
);
27367 size
= offsets
->outgoing_args
- offsets
->saved_args
;
27368 if (flag_stack_usage_info
)
27369 current_function_static_stack_size
= size
;
27371 /* If we have a frame, then do stack checking. FIXME: not implemented. */
27372 if ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27373 || flag_stack_clash_protection
)
27375 sorry ("%<-fstack-check=specific%> for Thumb-1");
27377 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27378 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
27383 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27384 GEN_INT (- amount
)));
27385 RTX_FRAME_RELATED_P (insn
) = 1;
27391 /* The stack decrement is too big for an immediate value in a single
27392 insn. In theory we could issue multiple subtracts, but after
27393 three of them it becomes more space efficient to place the full
27394 value in the constant pool and load into a register. (Also the
27395 ARM debugger really likes to see only one stack decrement per
27396 function). So instead we look for a scratch register into which
27397 we can load the decrement, and then we subtract this from the
27398 stack pointer. Unfortunately on the thumb the only available
27399 scratch registers are the argument registers, and we cannot use
27400 these as they may hold arguments to the function. Instead we
27401 attempt to locate a call preserved register which is used by this
27402 function. If we can find one, then we know that it will have
27403 been pushed at the start of the prologue and so we can corrupt
27405 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
27406 if (live_regs_mask
& (1 << regno
))
27409 gcc_assert(regno
<= LAST_LO_REGNUM
);
27411 reg
= gen_rtx_REG (SImode
, regno
);
27413 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
27415 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27416 stack_pointer_rtx
, reg
));
27418 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
27419 plus_constant (Pmode
, stack_pointer_rtx
,
27421 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
27422 RTX_FRAME_RELATED_P (insn
) = 1;
27426 if (frame_pointer_needed
)
27427 thumb_set_frame_pointer (offsets
);
27429 /* If we are profiling, make sure no instructions are scheduled before
27430 the call to mcount. Similarly if the user has requested no
27431 scheduling in the prolog. Similarly if we want non-call exceptions
27432 using the EABI unwinder, to prevent faulting instructions from being
27433 swapped with a stack adjustment. */
27434 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
27435 || (arm_except_unwind_info (&global_options
) == UI_TARGET
27436 && cfun
->can_throw_non_call_exceptions
))
27437 emit_insn (gen_blockage ());
27439 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
27440 if (live_regs_mask
& 0xff)
27441 cfun
->machine
->lr_save_eliminated
= 0;
27444 /* Clear caller saved registers not used to pass return values and leaked
27445 condition flags before exiting a cmse_nonsecure_entry function. */
27448 cmse_nonsecure_entry_clear_before_return (void)
27450 bool clear_vfpregs
= TARGET_HARD_FLOAT
|| TARGET_HAVE_FPCXT_CMSE
;
27451 int regno
, maxregno
= clear_vfpregs
? LAST_VFP_REGNUM
: IP_REGNUM
;
27452 uint32_t padding_bits_to_clear
= 0;
27453 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
27454 rtx r1_reg
, result_rtl
, clearing_reg
= NULL_RTX
;
27457 bitmap_clear (to_clear_bitmap
);
27458 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
27459 bitmap_set_bit (to_clear_bitmap
, IP_REGNUM
);
27461 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27465 int float_bits
= D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1;
27467 bitmap_set_range (to_clear_bitmap
, FIRST_VFP_REGNUM
, float_bits
);
27469 if (!TARGET_HAVE_FPCXT_CMSE
)
27471 /* Make sure we don't clear the two scratch registers used to clear
27472 the relevant FPSCR bits in output_return_instruction. */
27473 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
27474 bitmap_clear_bit (to_clear_bitmap
, IP_REGNUM
);
27475 emit_use (gen_rtx_REG (SImode
, 4));
27476 bitmap_clear_bit (to_clear_bitmap
, 4);
27480 /* If the user has defined registers to be caller saved, these are no longer
27481 restored by the function before returning and must thus be cleared for
27482 security purposes. */
27483 for (regno
= NUM_ARG_REGS
; regno
<= maxregno
; regno
++)
27485 /* We do not touch registers that can be used to pass arguments as per
27486 the AAPCS, since these should never be made callee-saved by user
27488 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
27490 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
27492 if (!callee_saved_reg_p (regno
)
27493 && (!IN_RANGE (regno
, FIRST_VFP_REGNUM
, LAST_VFP_REGNUM
)
27494 || TARGET_HARD_FLOAT
))
27495 bitmap_set_bit (to_clear_bitmap
, regno
);
27498 /* Make sure we do not clear the registers used to return the result in. */
27499 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
27500 if (!VOID_TYPE_P (result_type
))
27502 uint64_t to_clear_return_mask
;
27503 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
27505 /* No need to check that we return in registers, because we don't
27506 support returning on stack yet. */
27507 gcc_assert (REG_P (result_rtl
));
27508 to_clear_return_mask
27509 = compute_not_to_clear_mask (result_type
, result_rtl
, 0,
27510 &padding_bits_to_clear
);
27511 if (to_clear_return_mask
)
27513 gcc_assert ((unsigned) maxregno
< sizeof (long long) * __CHAR_BIT__
);
27514 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
27516 if (to_clear_return_mask
& (1ULL << regno
))
27517 bitmap_clear_bit (to_clear_bitmap
, regno
);
27522 if (padding_bits_to_clear
!= 0)
27524 int to_clear_bitmap_size
= SBITMAP_SIZE ((sbitmap
) to_clear_bitmap
);
27525 auto_sbitmap
to_clear_arg_regs_bitmap (to_clear_bitmap_size
);
27527 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27528 returning a composite type, which only uses r0. Let's make sure that
27529 r1-r3 is cleared too. */
27530 bitmap_clear (to_clear_arg_regs_bitmap
);
27531 bitmap_set_range (to_clear_arg_regs_bitmap
, R1_REGNUM
, NUM_ARG_REGS
- 1);
27532 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap
, to_clear_bitmap
));
27535 /* Clear full registers that leak before returning. */
27536 clearing_reg
= gen_rtx_REG (SImode
, TARGET_THUMB1
? R0_REGNUM
: LR_REGNUM
);
27537 r1_reg
= gen_rtx_REG (SImode
, R0_REGNUM
+ 1);
27538 cmse_clear_registers (to_clear_bitmap
, &padding_bits_to_clear
, 1, r1_reg
,
27542 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27543 POP instruction can be generated. LR should be replaced by PC. All
27544 the checks required are already done by USE_RETURN_INSN (). Hence,
27545 all we really need to check here is if single register is to be
27546 returned, or multiple register return. */
27548 thumb2_expand_return (bool simple_return
)
27551 unsigned long saved_regs_mask
;
27552 arm_stack_offsets
*offsets
;
27554 offsets
= arm_get_frame_offsets ();
27555 saved_regs_mask
= offsets
->saved_regs_mask
;
27557 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27558 if (saved_regs_mask
& (1 << i
))
27561 if (!simple_return
&& saved_regs_mask
)
27563 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27564 functions or adapt code to handle according to ACLE. This path should
27565 not be reachable for cmse_nonsecure_entry functions though we prefer
27566 to assert it for now to ensure that future code changes do not silently
27567 change this behavior. */
27568 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27569 if (arm_current_function_pac_enabled_p ())
27571 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
27572 arm_emit_multi_reg_pop (saved_regs_mask
);
27573 emit_insn (gen_aut_nop ());
27574 emit_jump_insn (simple_return_rtx
);
27576 else if (num_regs
== 1)
27578 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27579 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
27580 rtx addr
= gen_rtx_MEM (SImode
,
27581 gen_rtx_POST_INC (SImode
,
27582 stack_pointer_rtx
));
27583 set_mem_alias_set (addr
, get_frame_alias_set ());
27584 XVECEXP (par
, 0, 0) = ret_rtx
;
27585 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
27586 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
27587 emit_jump_insn (par
);
27591 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
27592 saved_regs_mask
|= (1 << PC_REGNUM
);
27593 arm_emit_multi_reg_pop (saved_regs_mask
);
27598 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27599 cmse_nonsecure_entry_clear_before_return ();
27600 emit_jump_insn (simple_return_rtx
);
27605 thumb1_expand_epilogue (void)
27607 HOST_WIDE_INT amount
;
27608 arm_stack_offsets
*offsets
;
27611 /* Naked functions don't have prologues. */
27612 if (IS_NAKED (arm_current_func_type ()))
27615 offsets
= arm_get_frame_offsets ();
27616 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27618 if (frame_pointer_needed
)
27620 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
27621 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27623 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
27625 gcc_assert (amount
>= 0);
27628 emit_insn (gen_blockage ());
27631 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27632 GEN_INT (amount
)));
27635 /* r3 is always free in the epilogue. */
27636 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
27638 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
27639 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
27643 /* Emit a USE (stack_pointer_rtx), so that
27644 the stack adjustment will not be deleted. */
27645 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27647 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
27648 emit_insn (gen_blockage ());
27650 /* Emit a clobber for each insn that will be restored in the epilogue,
27651 so that flow2 will get register lifetimes correct. */
27652 for (regno
= 0; regno
< 13; regno
++)
27653 if (reg_needs_saving_p (regno
))
27654 emit_clobber (gen_rtx_REG (SImode
, regno
));
27656 if (! df_regs_ever_live_p (LR_REGNUM
))
27657 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
27659 /* Clear all caller-saved regs that are not used to return. */
27660 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27661 cmse_nonsecure_entry_clear_before_return ();
27664 /* Epilogue code for APCS frame. */
27666 arm_expand_epilogue_apcs_frame (bool really_return
)
27668 unsigned long func_type
;
27669 unsigned long saved_regs_mask
;
27672 int floats_from_frame
= 0;
27673 arm_stack_offsets
*offsets
;
27675 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
27676 func_type
= arm_current_func_type ();
27678 /* Get frame offsets for ARM. */
27679 offsets
= arm_get_frame_offsets ();
27680 saved_regs_mask
= offsets
->saved_regs_mask
;
27682 /* Find the offset of the floating-point save area in the frame. */
27684 = (offsets
->saved_args
27685 + arm_compute_static_chain_stack_bytes ()
27688 /* Compute how many core registers saved and how far away the floats are. */
27689 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27690 if (saved_regs_mask
& (1 << i
))
27693 floats_from_frame
+= 4;
27696 if (TARGET_VFP_BASE
)
27699 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
27701 /* The offset is from IP_REGNUM. */
27702 int saved_size
= arm_get_vfp_saved_size ();
27703 if (saved_size
> 0)
27706 floats_from_frame
+= saved_size
;
27707 insn
= emit_insn (gen_addsi3 (ip_rtx
,
27708 hard_frame_pointer_rtx
,
27709 GEN_INT (-floats_from_frame
)));
27710 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
27711 ip_rtx
, hard_frame_pointer_rtx
);
27714 /* Generate VFP register multi-pop. */
27715 start_reg
= FIRST_VFP_REGNUM
;
27717 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
27718 /* Look for a case where a reg does not need restoring. */
27719 if (!reg_needs_saving_p (i
) && !reg_needs_saving_p (i
+ 1))
27721 if (start_reg
!= i
)
27722 arm_emit_vfp_multi_reg_pop (start_reg
,
27723 (i
- start_reg
) / 2,
27724 gen_rtx_REG (SImode
,
27729 /* Restore the remaining regs that we have discovered (or possibly
27730 even all of them, if the conditional in the for loop never
27732 if (start_reg
!= i
)
27733 arm_emit_vfp_multi_reg_pop (start_reg
,
27734 (i
- start_reg
) / 2,
27735 gen_rtx_REG (SImode
, IP_REGNUM
));
27740 /* The frame pointer is guaranteed to be non-double-word aligned, as
27741 it is set to double-word-aligned old_stack_pointer - 4. */
27743 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
27745 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
27746 if (reg_needs_saving_p (i
))
27748 rtx addr
= gen_frame_mem (V2SImode
,
27749 plus_constant (Pmode
, hard_frame_pointer_rtx
,
27751 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27752 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27753 gen_rtx_REG (V2SImode
, i
),
27759 /* saved_regs_mask should contain IP which contains old stack pointer
27760 at the time of activation creation. Since SP and IP are adjacent registers,
27761 we can restore the value directly into SP. */
27762 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
27763 saved_regs_mask
&= ~(1 << IP_REGNUM
);
27764 saved_regs_mask
|= (1 << SP_REGNUM
);
27766 /* There are two registers left in saved_regs_mask - LR and PC. We
27767 only need to restore LR (the return address), but to
27768 save time we can load it directly into PC, unless we need a
27769 special function exit sequence, or we are not really returning. */
27771 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
27772 && !crtl
->calls_eh_return
)
27773 /* Delete LR from the register mask, so that LR on
27774 the stack is loaded into the PC in the register mask. */
27775 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27777 saved_regs_mask
&= ~(1 << PC_REGNUM
);
27779 num_regs
= bit_count (saved_regs_mask
);
27780 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
27783 emit_insn (gen_blockage ());
27784 /* Unwind the stack to just below the saved registers. */
27785 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27786 hard_frame_pointer_rtx
,
27787 GEN_INT (- 4 * num_regs
)));
27789 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
27790 stack_pointer_rtx
, hard_frame_pointer_rtx
);
27793 arm_emit_multi_reg_pop (saved_regs_mask
);
27795 if (IS_INTERRUPT (func_type
))
27797 /* Interrupt handlers will have pushed the
27798 IP onto the stack, so restore it now. */
27800 rtx addr
= gen_rtx_MEM (SImode
,
27801 gen_rtx_POST_INC (SImode
,
27802 stack_pointer_rtx
));
27803 set_mem_alias_set (addr
, get_frame_alias_set ());
27804 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
27805 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27806 gen_rtx_REG (SImode
, IP_REGNUM
),
27810 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
27813 if (crtl
->calls_eh_return
)
27814 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27816 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27818 if (IS_STACKALIGN (func_type
))
27819 /* Restore the original stack pointer. Before prologue, the stack was
27820 realigned and the original stack pointer saved in r0. For details,
27821 see comment in arm_expand_prologue. */
27822 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
27824 emit_jump_insn (simple_return_rtx
);
27827 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27828 function is not a sibcall. */
27830 arm_expand_epilogue (bool really_return
)
27832 unsigned long func_type
;
27833 unsigned long saved_regs_mask
;
27837 arm_stack_offsets
*offsets
;
27839 func_type
= arm_current_func_type ();
27841 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27842 let output_return_instruction take care of instruction emission if any. */
27843 if (IS_NAKED (func_type
)
27844 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
27847 emit_jump_insn (simple_return_rtx
);
27851 /* If we are throwing an exception, then we really must be doing a
27852 return, so we can't tail-call. */
27853 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
27855 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
27857 arm_expand_epilogue_apcs_frame (really_return
);
27861 /* Get frame offsets for ARM. */
27862 offsets
= arm_get_frame_offsets ();
27863 saved_regs_mask
= offsets
->saved_regs_mask
;
27864 num_regs
= bit_count (saved_regs_mask
);
27866 if (frame_pointer_needed
)
27869 /* Restore stack pointer if necessary. */
27872 /* In ARM mode, frame pointer points to first saved register.
27873 Restore stack pointer to last saved register. */
27874 amount
= offsets
->frame
- offsets
->saved_regs
;
27876 /* Force out any pending memory operations that reference stacked data
27877 before stack de-allocation occurs. */
27878 emit_insn (gen_blockage ());
27879 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27880 hard_frame_pointer_rtx
,
27881 GEN_INT (amount
)));
27882 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27884 hard_frame_pointer_rtx
);
27886 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27888 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27892 /* In Thumb-2 mode, the frame pointer points to the last saved
27894 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27897 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
27898 hard_frame_pointer_rtx
,
27899 GEN_INT (amount
)));
27900 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27901 hard_frame_pointer_rtx
,
27902 hard_frame_pointer_rtx
);
27905 /* Force out any pending memory operations that reference stacked data
27906 before stack de-allocation occurs. */
27907 emit_insn (gen_blockage ());
27908 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
27909 hard_frame_pointer_rtx
));
27910 arm_add_cfa_adjust_cfa_note (insn
, 0,
27912 hard_frame_pointer_rtx
);
27913 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27915 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27920 /* Pop off outgoing args and local frame to adjust stack pointer to
27921 last saved register. */
27922 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27926 /* Force out any pending memory operations that reference stacked data
27927 before stack de-allocation occurs. */
27928 emit_insn (gen_blockage ());
27929 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27931 GEN_INT (amount
)));
27932 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27933 stack_pointer_rtx
, stack_pointer_rtx
);
27934 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27936 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27940 if (TARGET_VFP_BASE
)
27942 /* Generate VFP register multi-pop. */
27943 int end_reg
= LAST_VFP_REGNUM
+ 1;
27945 /* Scan the registers in reverse order. We need to match
27946 any groupings made in the prologue and generate matching
27947 vldm operations. The need to match groups is because,
27948 unlike pop, vldm can only do consecutive regs. */
27949 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
27950 /* Look for a case where a reg does not need restoring. */
27951 if (!reg_needs_saving_p (i
) && !reg_needs_saving_p (i
+ 1))
27953 /* Restore the regs discovered so far (from reg+2 to
27955 if (end_reg
> i
+ 2)
27956 arm_emit_vfp_multi_reg_pop (i
+ 2,
27957 (end_reg
- (i
+ 2)) / 2,
27958 stack_pointer_rtx
);
27962 /* Restore the remaining regs that we have discovered (or possibly
27963 even all of them, if the conditional in the for loop never
27965 if (end_reg
> i
+ 2)
27966 arm_emit_vfp_multi_reg_pop (i
+ 2,
27967 (end_reg
- (i
+ 2)) / 2,
27968 stack_pointer_rtx
);
27972 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
27973 if (reg_needs_saving_p (i
))
27976 rtx addr
= gen_rtx_MEM (V2SImode
,
27977 gen_rtx_POST_INC (SImode
,
27978 stack_pointer_rtx
));
27979 set_mem_alias_set (addr
, get_frame_alias_set ());
27980 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27981 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27982 gen_rtx_REG (V2SImode
, i
),
27984 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27985 stack_pointer_rtx
, stack_pointer_rtx
);
27988 if (saved_regs_mask
)
27991 bool return_in_pc
= false;
27993 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
27994 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
27995 && !IS_CMSE_ENTRY (func_type
)
27996 && !IS_STACKALIGN (func_type
)
27998 && crtl
->args
.pretend_args_size
== 0
27999 && saved_regs_mask
& (1 << LR_REGNUM
)
28000 && !crtl
->calls_eh_return
28001 && !arm_current_function_pac_enabled_p ())
28003 saved_regs_mask
&= ~(1 << LR_REGNUM
);
28004 saved_regs_mask
|= (1 << PC_REGNUM
);
28005 return_in_pc
= true;
28008 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
28010 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
28011 if (saved_regs_mask
& (1 << i
))
28013 rtx addr
= gen_rtx_MEM (SImode
,
28014 gen_rtx_POST_INC (SImode
,
28015 stack_pointer_rtx
));
28016 set_mem_alias_set (addr
, get_frame_alias_set ());
28018 if (i
== PC_REGNUM
)
28020 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
28021 XVECEXP (insn
, 0, 0) = ret_rtx
;
28022 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
28024 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
28025 insn
= emit_jump_insn (insn
);
28029 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
28031 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
28032 gen_rtx_REG (SImode
, i
),
28034 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
28036 stack_pointer_rtx
);
28043 && current_tune
->prefer_ldrd_strd
28044 && !optimize_function_for_size_p (cfun
))
28047 thumb2_emit_ldrd_pop (saved_regs_mask
);
28048 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
28049 arm_emit_ldrd_pop (saved_regs_mask
);
28051 arm_emit_multi_reg_pop (saved_regs_mask
);
28054 arm_emit_multi_reg_pop (saved_regs_mask
);
28062 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
28066 rtx dwarf
= NULL_RTX
;
28068 emit_insn (gen_addsi3 (stack_pointer_rtx
,
28070 GEN_INT (amount
)));
28072 RTX_FRAME_RELATED_P (tmp
) = 1;
28074 if (cfun
->machine
->uses_anonymous_args
)
28076 /* Restore pretend args. Refer arm_expand_prologue on how to save
28077 pretend_args in stack. */
28078 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
28079 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
28080 for (j
= 0, i
= 0; j
< num_regs
; i
++)
28081 if (saved_regs_mask
& (1 << i
))
28083 rtx reg
= gen_rtx_REG (SImode
, i
);
28084 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
28087 REG_NOTES (tmp
) = dwarf
;
28089 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
28090 stack_pointer_rtx
, stack_pointer_rtx
);
28093 if (IS_CMSE_ENTRY (func_type
))
28095 /* CMSE_ENTRY always returns. */
28096 gcc_assert (really_return
);
28097 /* Clear all caller-saved regs that are not used to return. */
28098 cmse_nonsecure_entry_clear_before_return ();
28100 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
28102 if (TARGET_HAVE_FPCXT_CMSE
)
28106 insn
= emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx
,
28107 GEN_INT (FPCXTNS_ENUM
)));
28108 rtx dwarf
= gen_rtx_SET (stack_pointer_rtx
,
28109 plus_constant (Pmode
, stack_pointer_rtx
, 4));
28110 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
28111 RTX_FRAME_RELATED_P (insn
) = 1;
28115 if (arm_current_function_pac_enabled_p ())
28116 emit_insn (gen_aut_nop ());
28118 if (!really_return
)
28121 if (crtl
->calls_eh_return
)
28122 emit_insn (gen_addsi3 (stack_pointer_rtx
,
28124 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
28126 if (IS_STACKALIGN (func_type
))
28127 /* Restore the original stack pointer. Before prologue, the stack was
28128 realigned and the original stack pointer saved in r0. For details,
28129 see comment in arm_expand_prologue. */
28130 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
28132 emit_jump_insn (simple_return_rtx
);
28135 /* Implementation of insn prologue_thumb1_interwork. This is the first
28136 "instruction" of a function called in ARM mode. Swap to thumb mode. */
28139 thumb1_output_interwork (void)
28142 FILE *f
= asm_out_file
;
28144 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
28145 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
28147 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
28149 /* Generate code sequence to switch us into Thumb mode. */
28150 /* The .code 32 directive has already been emitted by
28151 ASM_DECLARE_FUNCTION_NAME. */
28152 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
28153 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
28155 /* Generate a label, so that the debugger will notice the
28156 change in instruction sets. This label is also used by
28157 the assembler to bypass the ARM code when this function
28158 is called from a Thumb encoded function elsewhere in the
28159 same file. Hence the definition of STUB_NAME here must
28160 agree with the definition in gas/config/tc-arm.c. */
28162 #define STUB_NAME ".real_start_of"
28164 fprintf (f
, "\t.code\t16\n");
28166 if (arm_dllexport_name_p (name
))
28167 name
= arm_strip_name_encoding (name
);
28169 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
28170 fprintf (f
, "\t.thumb_func\n");
28171 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
28176 /* Handle the case of a double word load into a low register from
28177 a computed memory address. The computed address may involve a
28178 register which is overwritten by the load. */
28180 thumb_load_double_from_address (rtx
*operands
)
28188 gcc_assert (REG_P (operands
[0]));
28189 gcc_assert (MEM_P (operands
[1]));
28191 /* Get the memory address. */
28192 addr
= XEXP (operands
[1], 0);
28194 /* Work out how the memory address is computed. */
28195 switch (GET_CODE (addr
))
28198 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28200 if (REGNO (operands
[0]) == REGNO (addr
))
28202 output_asm_insn ("ldr\t%H0, %2", operands
);
28203 output_asm_insn ("ldr\t%0, %1", operands
);
28207 output_asm_insn ("ldr\t%0, %1", operands
);
28208 output_asm_insn ("ldr\t%H0, %2", operands
);
28213 /* Compute <address> + 4 for the high order load. */
28214 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28216 output_asm_insn ("ldr\t%0, %1", operands
);
28217 output_asm_insn ("ldr\t%H0, %2", operands
);
28221 arg1
= XEXP (addr
, 0);
28222 arg2
= XEXP (addr
, 1);
28224 if (CONSTANT_P (arg1
))
28225 base
= arg2
, offset
= arg1
;
28227 base
= arg1
, offset
= arg2
;
28229 gcc_assert (REG_P (base
));
28231 /* Catch the case of <address> = <reg> + <reg> */
28232 if (REG_P (offset
))
28234 int reg_offset
= REGNO (offset
);
28235 int reg_base
= REGNO (base
);
28236 int reg_dest
= REGNO (operands
[0]);
28238 /* Add the base and offset registers together into the
28239 higher destination register. */
28240 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
28241 reg_dest
+ 1, reg_base
, reg_offset
);
28243 /* Load the lower destination register from the address in
28244 the higher destination register. */
28245 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
28246 reg_dest
, reg_dest
+ 1);
28248 /* Load the higher destination register from its own address
28250 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
28251 reg_dest
+ 1, reg_dest
+ 1);
28255 /* Compute <address> + 4 for the high order load. */
28256 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28258 /* If the computed address is held in the low order register
28259 then load the high order register first, otherwise always
28260 load the low order register first. */
28261 if (REGNO (operands
[0]) == REGNO (base
))
28263 output_asm_insn ("ldr\t%H0, %2", operands
);
28264 output_asm_insn ("ldr\t%0, %1", operands
);
28268 output_asm_insn ("ldr\t%0, %1", operands
);
28269 output_asm_insn ("ldr\t%H0, %2", operands
);
28275 /* With no registers to worry about we can just load the value
28277 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28279 output_asm_insn ("ldr\t%H0, %2", operands
);
28280 output_asm_insn ("ldr\t%0, %1", operands
);
28284 gcc_unreachable ();
28291 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
28296 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28297 std::swap (operands
[4], operands
[5]);
28299 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
28300 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
28304 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28305 std::swap (operands
[4], operands
[5]);
28306 if (REGNO (operands
[5]) > REGNO (operands
[6]))
28307 std::swap (operands
[5], operands
[6]);
28308 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28309 std::swap (operands
[4], operands
[5]);
28311 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
28312 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
28316 gcc_unreachable ();
28322 /* Output a call-via instruction for thumb state. */
28324 thumb_call_via_reg (rtx reg
)
28326 int regno
= REGNO (reg
);
28329 gcc_assert (regno
< LR_REGNUM
);
28331 /* If we are in the normal text section we can use a single instance
28332 per compilation unit. If we are doing function sections, then we need
28333 an entry per section, since we can't rely on reachability. */
28334 if (in_section
== text_section
)
28336 thumb_call_reg_needed
= 1;
28338 if (thumb_call_via_label
[regno
] == NULL
)
28339 thumb_call_via_label
[regno
] = gen_label_rtx ();
28340 labelp
= thumb_call_via_label
+ regno
;
28344 if (cfun
->machine
->call_via
[regno
] == NULL
)
28345 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
28346 labelp
= cfun
->machine
->call_via
+ regno
;
28349 output_asm_insn ("bl\t%a0", labelp
);
28353 /* Routines for generating rtl. */
28355 thumb_expand_cpymemqi (rtx
*operands
)
28357 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
28358 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
28359 HOST_WIDE_INT len
= INTVAL (operands
[2]);
28360 HOST_WIDE_INT offset
= 0;
28364 emit_insn (gen_cpymem12b (out
, in
, out
, in
));
28370 emit_insn (gen_cpymem8b (out
, in
, out
, in
));
28376 rtx reg
= gen_reg_rtx (SImode
);
28377 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
28378 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
28385 rtx reg
= gen_reg_rtx (HImode
);
28386 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
28387 plus_constant (Pmode
, in
,
28389 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
28398 rtx reg
= gen_reg_rtx (QImode
);
28399 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
28400 plus_constant (Pmode
, in
,
28402 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
28409 thumb_reload_out_hi (rtx
*operands
)
28411 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
28414 /* Return the length of a function name prefix
28415 that starts with the character 'c'. */
28417 arm_get_strip_length (int c
)
28421 ARM_NAME_ENCODING_LENGTHS
28426 /* Return a pointer to a function's name with any
28427 and all prefix encodings stripped from it. */
28429 arm_strip_name_encoding (const char *name
)
28433 while ((skip
= arm_get_strip_length (* name
)))
28439 /* If there is a '*' anywhere in the name's prefix, then
28440 emit the stripped name verbatim, otherwise prepend an
28441 underscore if leading underscores are being used. */
28443 arm_asm_output_labelref (FILE *stream
, const char *name
)
28448 while ((skip
= arm_get_strip_length (* name
)))
28450 verbatim
|= (*name
== '*');
28455 fputs (name
, stream
);
28457 asm_fprintf (stream
, "%U%s", name
);
28460 /* This function is used to emit an EABI tag and its associated value.
28461 We emit the numerical value of the tag in case the assembler does not
28462 support textual tags. (Eg gas prior to 2.20). If requested we include
28463 the tag name in a comment so that anyone reading the assembler output
28464 will know which tag is being set.
28466 This function is not static because arm-c.cc needs it too. */
28469 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
28471 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
28472 if (flag_verbose_asm
|| flag_debug_asm
)
28473 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
28474 asm_fprintf (asm_out_file
, "\n");
28477 /* This function is used to print CPU tuning information as comment
28478 in assembler file. Pointers are not printed for now. */
28481 arm_print_tune_info (void)
28483 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
28484 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
28485 current_tune
->constant_limit
);
28486 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28487 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
28488 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28489 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
28490 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28491 "prefetch.l1_cache_size:\t%d\n",
28492 current_tune
->prefetch
.l1_cache_size
);
28493 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28494 "prefetch.l1_cache_line_size:\t%d\n",
28495 current_tune
->prefetch
.l1_cache_line_size
);
28496 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28497 "prefer_constant_pool:\t%d\n",
28498 (int) current_tune
->prefer_constant_pool
);
28499 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28500 "branch_cost:\t(s:speed, p:predictable)\n");
28501 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
28502 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
28503 current_tune
->branch_cost (false, false));
28504 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
28505 current_tune
->branch_cost (false, true));
28506 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
28507 current_tune
->branch_cost (true, false));
28508 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
28509 current_tune
->branch_cost (true, true));
28510 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28511 "prefer_ldrd_strd:\t%d\n",
28512 (int) current_tune
->prefer_ldrd_strd
);
28513 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28514 "logical_op_non_short_circuit:\t[%d,%d]\n",
28515 (int) current_tune
->logical_op_non_short_circuit_thumb
,
28516 (int) current_tune
->logical_op_non_short_circuit_arm
);
28517 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28518 "disparage_flag_setting_t16_encodings:\t%d\n",
28519 (int) current_tune
->disparage_flag_setting_t16_encodings
);
28520 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28521 "string_ops_prefer_neon:\t%d\n",
28522 (int) current_tune
->string_ops_prefer_neon
);
28523 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28524 "max_insns_inline_memset:\t%d\n",
28525 current_tune
->max_insns_inline_memset
);
28526 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
28527 current_tune
->fusible_ops
);
28528 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
28529 (int) current_tune
->sched_autopref
);
28532 /* The last set of target options used to emit .arch directives, etc. This
28533 could be a function-local static if it were not required to expose it as a
28534 root to the garbage collector. */
28535 static GTY(()) cl_target_option
*last_asm_targ_options
= NULL
;
28537 /* Print .arch and .arch_extension directives corresponding to the
28538 current architecture configuration. */
28540 arm_print_asm_arch_directives (FILE *stream
, cl_target_option
*targ_options
)
28542 arm_build_target build_target
;
28543 /* If the target options haven't changed since the last time we were called
28544 there is nothing to do. This should be sufficient to suppress the
28545 majority of redundant work. */
28546 if (last_asm_targ_options
== targ_options
)
28549 last_asm_targ_options
= targ_options
;
28551 build_target
.isa
= sbitmap_alloc (isa_num_bits
);
28552 arm_configure_build_target (&build_target
, targ_options
, false);
28554 if (build_target
.core_name
28555 && !bitmap_bit_p (build_target
.isa
, isa_bit_quirk_no_asmcpu
))
28557 const char* truncated_name
28558 = arm_rewrite_selected_cpu (build_target
.core_name
);
28559 asm_fprintf (stream
, "\t.cpu %s\n", truncated_name
);
28562 const arch_option
*arch
28563 = arm_parse_arch_option_name (all_architectures
, "-march",
28564 build_target
.arch_name
);
28565 auto_sbitmap
opt_bits (isa_num_bits
);
28569 if (strcmp (build_target
.arch_name
, "armv7ve") == 0)
28571 /* Keep backward compatability for assemblers which don't support
28572 armv7ve. Fortunately, none of the following extensions are reset
28573 by a .fpu directive. */
28574 asm_fprintf (stream
, "\t.arch armv7-a\n");
28575 asm_fprintf (stream
, "\t.arch_extension virt\n");
28576 asm_fprintf (stream
, "\t.arch_extension idiv\n");
28577 asm_fprintf (stream
, "\t.arch_extension sec\n");
28578 asm_fprintf (stream
, "\t.arch_extension mp\n");
28581 asm_fprintf (stream
, "\t.arch %s\n", build_target
.arch_name
);
28583 /* The .fpu directive will reset any architecture extensions from the
28584 assembler that relate to the fp/vector extensions. So put this out before
28585 any .arch_extension directives. */
28586 const char *fpu_name
= (TARGET_SOFT_FLOAT
28588 : arm_identify_fpu_from_isa (build_target
.isa
));
28589 asm_fprintf (stream
, "\t.fpu %s\n", fpu_name
);
28591 if (!arch
->common
.extensions
)
28594 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
28600 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
28602 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28603 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28604 floating point instructions is disabled. So the following check
28605 restricts the printing of ".arch_extension mve" and
28606 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28607 this special behaviour because the feature bit "mve" and
28608 "mve_float" are not part of "fpu bits", so they are not cleared
28609 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28610 TARGET_HAVE_MVE_FLOAT are disabled. */
28611 if ((bitmap_bit_p (opt_bits
, isa_bit_mve
) && !TARGET_HAVE_MVE
)
28612 || (bitmap_bit_p (opt_bits
, isa_bit_mve_float
)
28613 && !TARGET_HAVE_MVE_FLOAT
))
28616 /* If every feature bit of this option is set in the target ISA
28617 specification, print out the option name. However, don't print
28618 anything if all the bits are part of the FPU specification. */
28619 if (bitmap_subset_p (opt_bits
, build_target
.isa
)
28620 && !bitmap_subset_p (opt_bits
, isa_all_fpubits_internal
))
28621 asm_fprintf (stream
, "\t.arch_extension %s\n", opt
->name
);
28627 arm_file_start (void)
28630 bool pac
= (aarch_ra_sign_scope
!= AARCH_FUNCTION_NONE
);
28631 bool bti
= (aarch_enable_bti
== 1);
28633 arm_print_asm_arch_directives
28634 (asm_out_file
, TREE_TARGET_OPTION (target_option_default_node
));
28638 /* If we have a named cpu, but we the assembler does not support that
28639 name via .cpu, put out a cpu name attribute; but don't do this if the
28640 name starts with the fictitious prefix, 'generic'. */
28641 if (arm_active_target
.core_name
28642 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_asmcpu
)
28643 && !startswith (arm_active_target
.core_name
, "generic"))
28645 const char* truncated_name
28646 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
28647 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_asmcpu
))
28648 asm_fprintf (asm_out_file
, "\t.eabi_attribute 5, \"%s\"\n",
28652 if (print_tune_info
)
28653 arm_print_tune_info ();
28655 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
28656 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28658 if (TARGET_HARD_FLOAT_ABI
)
28659 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28661 /* Some of these attributes only apply when the corresponding features
28662 are used. However we don't have any easy way of figuring this out.
28663 Conservatively record the setting that would have been used. */
28665 if (flag_rounding_math
)
28666 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28668 if (!flag_unsafe_math_optimizations
)
28670 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28671 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28673 if (flag_signaling_nans
)
28674 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28676 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28677 flag_finite_math_only
? 1 : 3);
28679 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28680 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28681 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28682 flag_short_enums
? 1 : 2);
28684 /* Tag_ABI_optimization_goals. */
28687 else if (optimize
>= 2)
28693 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
28695 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28698 if (arm_fp16_format
)
28699 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28700 (int) arm_fp16_format
);
28702 if (TARGET_HAVE_PACBTI
)
28704 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
28705 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
28707 else if (pac
|| bti
)
28709 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
28710 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
28714 arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
28716 arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
28718 if (arm_lang_output_object_attributes_hook
)
28719 arm_lang_output_object_attributes_hook();
28722 default_file_start ();
28726 arm_file_end (void)
28730 /* Just in case the last function output in the assembler had non-default
28731 architecture directives, we force the assembler state back to the default
28732 set, so that any 'calculated' build attributes are based on the default
28733 options rather than the special options for that function. */
28734 arm_print_asm_arch_directives
28735 (asm_out_file
, TREE_TARGET_OPTION (target_option_default_node
));
28737 if (NEED_INDICATE_EXEC_STACK
)
28738 /* Add .note.GNU-stack. */
28739 file_end_indicate_exec_stack ();
28741 if (! thumb_call_reg_needed
)
28744 switch_to_section (text_section
);
28745 asm_fprintf (asm_out_file
, "\t.code 16\n");
28746 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
28748 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
28750 rtx label
= thumb_call_via_label
[regno
];
28754 targetm
.asm_out
.internal_label (asm_out_file
, "L",
28755 CODE_LABEL_NUMBER (label
));
28756 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
28762 /* Symbols in the text segment can be accessed without indirecting via the
28763 constant pool; it may take an extra binary operation, but this is still
28764 faster than indirecting via memory. Don't do this when not optimizing,
28765 since we won't be calculating al of the offsets necessary to do this
28769 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
28771 if (optimize
> 0 && TREE_CONSTANT (decl
))
28772 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
28774 default_encode_section_info (decl
, rtl
, first
);
28776 #endif /* !ARM_PE */
28779 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
28781 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
28782 && !strcmp (prefix
, "L"))
28784 arm_ccfsm_state
= 0;
28785 arm_target_insn
= NULL
;
28787 default_internal_label (stream
, prefix
, labelno
);
28790 /* Define classes to generate code as RTL or output asm to a file.
28791 Using templates then allows to use the same code to output code
28792 sequences in the two formats. */
28793 class thumb1_const_rtl
28796 thumb1_const_rtl (rtx dst
) : dst (dst
) {}
28798 void mov (HOST_WIDE_INT val
)
28800 emit_set_insn (dst
, GEN_INT (val
));
28803 void add (HOST_WIDE_INT val
)
28805 emit_set_insn (dst
, gen_rtx_PLUS (SImode
, dst
, GEN_INT (val
)));
28808 void ashift (HOST_WIDE_INT shift
)
28810 emit_set_insn (dst
, gen_rtx_ASHIFT (SImode
, dst
, GEN_INT (shift
)));
28815 emit_set_insn (dst
, gen_rtx_NEG (SImode
, dst
));
28822 class thumb1_const_print
28825 thumb1_const_print (FILE *f
, int regno
)
28828 dst_regname
= reg_names
[regno
];
28831 void mov (HOST_WIDE_INT val
)
28833 asm_fprintf (t_file
, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
28837 void add (HOST_WIDE_INT val
)
28839 asm_fprintf (t_file
, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
28843 void ashift (HOST_WIDE_INT shift
)
28845 asm_fprintf (t_file
, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
28846 dst_regname
, shift
);
28851 asm_fprintf (t_file
, "\trsbs\t%s, #0\n", dst_regname
);
28856 const char *dst_regname
;
28859 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28860 Avoid generating useless code when one of the bytes is zero. */
28863 thumb1_gen_const_int_1 (T dst
, HOST_WIDE_INT op1
)
28865 bool mov_done_p
= false;
28866 unsigned HOST_WIDE_INT val
= op1
;
28870 gcc_assert (op1
== trunc_int_for_mode (op1
, SImode
));
28878 /* For negative numbers with the first nine bits set, build the
28879 opposite of OP1, then negate it, it's generally shorter and not
28881 if ((val
& 0xFF800000) == 0xFF800000)
28883 thumb1_gen_const_int_1 (dst
, -op1
);
28888 /* In the general case, we need 7 instructions to build
28889 a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28890 do better if VAL is small enough, or
28891 right-shiftable by a suitable amount. If the
28892 right-shift enables to encode at least one less byte,
28893 it's worth it: we save a adds and a lsls at the
28894 expense of a final lsls. */
28895 int final_shift
= number_of_first_bit_set (val
);
28897 int leading_zeroes
= clz_hwi (val
);
28898 int number_of_bytes_needed
28899 = ((HOST_BITS_PER_WIDE_INT
- 1 - leading_zeroes
)
28900 / BITS_PER_UNIT
) + 1;
28901 int number_of_bytes_needed2
28902 = ((HOST_BITS_PER_WIDE_INT
- 1 - leading_zeroes
- final_shift
)
28903 / BITS_PER_UNIT
) + 1;
28905 if (number_of_bytes_needed2
< number_of_bytes_needed
)
28906 val
>>= final_shift
;
28910 /* If we are in a very small range, we can use either a single movs
28916 unsigned HOST_WIDE_INT high
= val
- 255;
28924 if (final_shift
> 0)
28925 dst
.ashift (final_shift
);
28929 /* General case, emit upper 3 bytes as needed. */
28930 for (i
= 0; i
< 3; i
++)
28932 unsigned HOST_WIDE_INT byte
= (val
>> (8 * (3 - i
))) & 0xff;
28936 /* We are about to emit new bits, stop accumulating a
28937 shift amount, and left-shift only if we have already
28938 emitted some upper bits. */
28941 dst
.ashift (shift
);
28947 /* Stop accumulating shift amount since we've just
28948 emitted some bits. */
28958 /* Emit lower byte. */
28960 dst
.mov (val
& 0xff);
28963 dst
.ashift (shift
);
28965 dst
.add (val
& 0xff);
28968 if (final_shift
> 0)
28969 dst
.ashift (final_shift
);
28973 /* Proxies for thumb1.md, since the thumb1_const_print and
28974 thumb1_const_rtl classes are not exported. */
28976 thumb1_gen_const_int_rtl (rtx dst
, HOST_WIDE_INT op1
)
28978 thumb1_const_rtl
t (dst
);
28979 thumb1_gen_const_int_1 (t
, op1
);
28983 thumb1_gen_const_int_print (rtx dst
, HOST_WIDE_INT op1
)
28985 thumb1_const_print
t (asm_out_file
, REGNO (dst
));
28986 thumb1_gen_const_int_1 (t
, op1
);
28989 /* Output code to add DELTA to the first argument, and then jump
28990 to FUNCTION. Used for C++ multiple inheritance. */
28993 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
28994 HOST_WIDE_INT
, tree function
)
28996 static int thunk_label
= 0;
28999 int mi_delta
= delta
;
29000 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
29002 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
29005 mi_delta
= - mi_delta
;
29007 final_start_function (emit_barrier (), file
, 1);
29011 int labelno
= thunk_label
++;
29012 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
29013 /* Thunks are entered in arm mode when available. */
29014 if (TARGET_THUMB1_ONLY
)
29016 /* push r3 so we can use it as a temporary. */
29017 /* TODO: Omit this save if r3 is not used. */
29018 fputs ("\tpush {r3}\n", file
);
29020 /* With -mpure-code, we cannot load the address from the
29021 constant pool: we build it explicitly. */
29022 if (target_pure_code
)
29024 fputs ("\tmovs\tr3, #:upper8_15:#", file
);
29025 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29026 fputc ('\n', file
);
29027 fputs ("\tlsls r3, #8\n", file
);
29028 fputs ("\tadds\tr3, #:upper0_7:#", file
);
29029 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29030 fputc ('\n', file
);
29031 fputs ("\tlsls r3, #8\n", file
);
29032 fputs ("\tadds\tr3, #:lower8_15:#", file
);
29033 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29034 fputc ('\n', file
);
29035 fputs ("\tlsls r3, #8\n", file
);
29036 fputs ("\tadds\tr3, #:lower0_7:#", file
);
29037 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29038 fputc ('\n', file
);
29041 fputs ("\tldr\tr3, ", file
);
29045 fputs ("\tldr\tr12, ", file
);
29048 if (!target_pure_code
)
29050 assemble_name (file
, label
);
29051 fputc ('\n', file
);
29056 /* If we are generating PIC, the ldr instruction below loads
29057 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
29058 the address of the add + 8, so we have:
29060 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
29063 Note that we have "+ 1" because some versions of GNU ld
29064 don't set the low bit of the result for R_ARM_REL32
29065 relocations against thumb function symbols.
29066 On ARMv6M this is +4, not +8. */
29067 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
29068 assemble_name (file
, labelpc
);
29069 fputs (":\n", file
);
29070 if (TARGET_THUMB1_ONLY
)
29072 /* This is 2 insns after the start of the thunk, so we know it
29073 is 4-byte aligned. */
29074 fputs ("\tadd\tr3, pc, r3\n", file
);
29075 fputs ("\tmov r12, r3\n", file
);
29078 fputs ("\tadd\tr12, pc, r12\n", file
);
29080 else if (TARGET_THUMB1_ONLY
)
29081 fputs ("\tmov r12, r3\n", file
);
29083 if (TARGET_THUMB1_ONLY
)
29085 if (mi_delta
> 255)
29087 /* With -mpure-code, we cannot load MI_DELTA from the
29088 constant pool: we build it explicitly. */
29089 if (target_pure_code
)
29091 thumb1_const_print
r3 (file
, 3);
29092 thumb1_gen_const_int_1 (r3
, mi_delta
);
29096 fputs ("\tldr\tr3, ", file
);
29097 assemble_name (file
, label
);
29098 fputs ("+4\n", file
);
29100 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
29101 mi_op
, this_regno
, this_regno
);
29103 else if (mi_delta
!= 0)
29105 /* Thumb1 unified syntax requires s suffix in instruction name when
29106 one of the operands is immediate. */
29107 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
29108 mi_op
, this_regno
, this_regno
,
29114 /* TODO: Use movw/movt for large constants when available. */
29115 while (mi_delta
!= 0)
29117 if ((mi_delta
& (3 << shift
)) == 0)
29121 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
29122 mi_op
, this_regno
, this_regno
,
29123 mi_delta
& (0xff << shift
));
29124 mi_delta
&= ~(0xff << shift
);
29131 if (TARGET_THUMB1_ONLY
)
29132 fputs ("\tpop\t{r3}\n", file
);
29134 fprintf (file
, "\tbx\tr12\n");
29136 /* With -mpure-code, we don't need to emit literals for the
29137 function address and delta since we emitted code to build
29139 if (!target_pure_code
)
29141 ASM_OUTPUT_ALIGN (file
, 2);
29142 assemble_name (file
, label
);
29143 fputs (":\n", file
);
29146 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
29147 rtx tem
= XEXP (DECL_RTL (function
), 0);
29148 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
29149 pipeline offset is four rather than eight. Adjust the offset
29151 tem
= plus_constant (GET_MODE (tem
), tem
,
29152 TARGET_THUMB1_ONLY
? -3 : -7);
29153 tem
= gen_rtx_MINUS (GET_MODE (tem
),
29155 gen_rtx_SYMBOL_REF (Pmode
,
29156 ggc_strdup (labelpc
)));
29157 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
29160 /* Output ".word .LTHUNKn". */
29161 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
29163 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
29164 assemble_integer (GEN_INT (mi_delta
), 4, BITS_PER_WORD
, 1);
29169 fputs ("\tb\t", file
);
29170 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29171 if (NEED_PLT_RELOC
)
29172 fputs ("(PLT)", file
);
29173 fputc ('\n', file
);
29176 final_end_function ();
29179 /* MI thunk handling for TARGET_32BIT. */
29182 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
29183 HOST_WIDE_INT vcall_offset
, tree function
)
29185 const bool long_call_p
= arm_is_long_call_p (function
);
29187 /* On ARM, this_regno is R0 or R1 depending on
29188 whether the function returns an aggregate or not.
29190 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
29192 ? R1_REGNUM
: R0_REGNUM
);
29194 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
29195 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
29196 reload_completed
= 1;
29197 emit_note (NOTE_INSN_PROLOGUE_END
);
29199 /* Add DELTA to THIS_RTX. */
29201 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
29202 delta
, this_rtx
, this_rtx
, false);
29204 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
29205 if (vcall_offset
!= 0)
29207 /* Load *THIS_RTX. */
29208 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
29209 /* Compute *THIS_RTX + VCALL_OFFSET. */
29210 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
29212 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
29213 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
29214 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
29217 /* Generate a tail call to the target function. */
29218 if (!TREE_USED (function
))
29220 assemble_external (function
);
29221 TREE_USED (function
) = 1;
29223 rtx funexp
= XEXP (DECL_RTL (function
), 0);
29226 emit_move_insn (temp
, funexp
);
29229 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
29230 rtx_insn
*insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
29231 SIBLING_CALL_P (insn
) = 1;
29234 /* Indirect calls require a bit of fixup in PIC mode. */
29237 split_all_insns_noflow ();
29241 insn
= get_insns ();
29242 shorten_branches (insn
);
29243 final_start_function (insn
, file
, 1);
29244 final (insn
, file
, 1);
29245 final_end_function ();
29247 /* Stop pretending this is a post-reload pass. */
29248 reload_completed
= 0;
29251 /* Output code to add DELTA to the first argument, and then jump
29252 to FUNCTION. Used for C++ multiple inheritance. */
29255 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
29256 HOST_WIDE_INT vcall_offset
, tree function
)
29258 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk
));
29260 assemble_start_function (thunk
, fnname
);
29262 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
29264 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
29265 assemble_end_function (thunk
, fnname
);
29269 arm_emit_vector_const (FILE *file
, rtx x
)
29272 const char * pattern
;
29274 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
29276 switch (GET_MODE (x
))
29278 case E_V2SImode
: pattern
= "%08x"; break;
29279 case E_V4HImode
: pattern
= "%04x"; break;
29280 case E_V8QImode
: pattern
= "%02x"; break;
29281 default: gcc_unreachable ();
29284 fprintf (file
, "0x");
29285 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
29289 element
= CONST_VECTOR_ELT (x
, i
);
29290 fprintf (file
, pattern
, INTVAL (element
));
29296 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29297 HFmode constant pool entries are actually loaded with ldr. */
29299 arm_emit_fp16_const (rtx c
)
29303 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
29304 if (WORDS_BIG_ENDIAN
)
29305 assemble_zeros (2);
29306 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
29307 if (!WORDS_BIG_ENDIAN
)
29308 assemble_zeros (2);
29312 arm_output_load_gr (rtx
*operands
)
29319 if (!MEM_P (operands
[1])
29320 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
29321 || !REG_P (reg
= XEXP (sum
, 0))
29322 || !CONST_INT_P (offset
= XEXP (sum
, 1))
29323 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
29324 return "wldrw%?\t%0, %1";
29326 /* Fix up an out-of-range load of a GR register. */
29327 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
29328 wcgr
= operands
[0];
29330 output_asm_insn ("ldr%?\t%0, %1", operands
);
29332 operands
[0] = wcgr
;
29334 output_asm_insn ("tmcr%?\t%0, %1", operands
);
29335 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
29340 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29342 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29343 named arg and all anonymous args onto the stack.
29344 XXX I know the prologue shouldn't be pushing registers, but it is faster
29348 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
29349 const function_arg_info
&arg
,
29351 int second_time ATTRIBUTE_UNUSED
)
29353 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
29356 cfun
->machine
->uses_anonymous_args
= 1;
29357 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
29359 nregs
= pcum
->aapcs_ncrn
;
29360 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl
))
29363 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
29364 if (res
< 0 && warn_psabi
)
29365 inform (input_location
, "parameter passing for argument of "
29366 "type %qT changed in GCC 7.1", arg
.type
);
29370 if (res
> 1 && warn_psabi
)
29371 inform (input_location
,
29372 "parameter passing for argument of type "
29373 "%qT changed in GCC 9.1", arg
.type
);
29378 nregs
= pcum
->nregs
;
29380 if (nregs
< NUM_ARG_REGS
)
29381 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
29384 /* We can't rely on the caller doing the proper promotion when
29385 using APCS or ATPCS. */
29388 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
29390 return !TARGET_AAPCS_BASED
;
29393 static machine_mode
29394 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
29396 int *punsignedp ATTRIBUTE_UNUSED
,
29397 const_tree fntype ATTRIBUTE_UNUSED
,
29398 int for_return ATTRIBUTE_UNUSED
)
29400 if (GET_MODE_CLASS (mode
) == MODE_INT
29401 && GET_MODE_SIZE (mode
) < 4)
29409 arm_default_short_enums (void)
29411 return ARM_DEFAULT_SHORT_ENUMS
;
29415 /* AAPCS requires that anonymous bitfields affect structure alignment. */
29418 arm_align_anon_bitfield (void)
29420 return TARGET_AAPCS_BASED
;
29424 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
29427 arm_cxx_guard_type (void)
29429 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
29433 /* The EABI says test the least significant bit of a guard variable. */
29436 arm_cxx_guard_mask_bit (void)
29438 return TARGET_AAPCS_BASED
;
29442 /* The EABI specifies that all array cookies are 8 bytes long. */
29445 arm_get_cookie_size (tree type
)
29449 if (!TARGET_AAPCS_BASED
)
29450 return default_cxx_get_cookie_size (type
);
29452 size
= build_int_cst (sizetype
, 8);
29457 /* The EABI says that array cookies should also contain the element size. */
29460 arm_cookie_has_size (void)
29462 return TARGET_AAPCS_BASED
;
29466 /* The EABI says constructors and destructors should return a pointer to
29467 the object constructed/destroyed. */
29470 arm_cxx_cdtor_returns_this (void)
29472 return TARGET_AAPCS_BASED
;
29475 /* The EABI says that an inline function may never be the key
29479 arm_cxx_key_method_may_be_inline (void)
29481 return !TARGET_AAPCS_BASED
;
29485 arm_cxx_determine_class_data_visibility (tree decl
)
29487 if (!TARGET_AAPCS_BASED
29488 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
29491 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29492 is exported. However, on systems without dynamic vague linkage,
29493 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
29494 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
29495 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
29497 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
29498 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
29502 arm_cxx_class_data_always_comdat (void)
29504 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29505 vague linkage if the class has no key function. */
29506 return !TARGET_AAPCS_BASED
;
29510 /* The EABI says __aeabi_atexit should be used to register static
29514 arm_cxx_use_aeabi_atexit (void)
29516 return TARGET_AAPCS_BASED
;
29521 arm_set_return_address (rtx source
, rtx scratch
)
29523 arm_stack_offsets
*offsets
;
29524 HOST_WIDE_INT delta
;
29526 unsigned long saved_regs
;
29528 offsets
= arm_get_frame_offsets ();
29529 saved_regs
= offsets
->saved_regs_mask
;
29531 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
29532 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
29535 if (frame_pointer_needed
)
29536 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
29539 /* LR will be the first saved register. */
29540 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
29545 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
29546 GEN_INT (delta
& ~4095)));
29551 addr
= stack_pointer_rtx
;
29553 addr
= plus_constant (Pmode
, addr
, delta
);
29556 /* The store needs to be marked to prevent DSE from deleting
29557 it as dead if it is based on fp. */
29558 mem
= gen_frame_mem (Pmode
, addr
);
29559 MEM_VOLATILE_P (mem
) = true;
29560 emit_move_insn (mem
, source
);
29566 thumb_set_return_address (rtx source
, rtx scratch
)
29568 arm_stack_offsets
*offsets
;
29569 HOST_WIDE_INT delta
;
29570 HOST_WIDE_INT limit
;
29573 unsigned long mask
;
29577 offsets
= arm_get_frame_offsets ();
29578 mask
= offsets
->saved_regs_mask
;
29579 if (mask
& (1 << LR_REGNUM
))
29582 /* Find the saved regs. */
29583 if (frame_pointer_needed
)
29585 delta
= offsets
->soft_frame
- offsets
->saved_args
;
29586 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
29592 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
29595 /* Allow for the stack frame. */
29596 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
29598 /* The link register is always the first saved register. */
29601 /* Construct the address. */
29602 addr
= gen_rtx_REG (SImode
, reg
);
29605 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
29606 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
29610 addr
= plus_constant (Pmode
, addr
, delta
);
29612 /* The store needs to be marked to prevent DSE from deleting
29613 it as dead if it is based on fp. */
29614 mem
= gen_frame_mem (Pmode
, addr
);
29615 MEM_VOLATILE_P (mem
) = true;
29616 emit_move_insn (mem
, source
);
29619 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
29622 /* Implements target hook vector_mode_supported_p. */
29624 arm_vector_mode_supported_p (machine_mode mode
)
29626 /* Neon also supports V2SImode, etc. listed in the clause below. */
29627 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
29628 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
29629 || mode
== V2DImode
|| mode
== V8HFmode
|| mode
== V4BFmode
29630 || mode
== V8BFmode
))
29633 if ((TARGET_NEON
|| TARGET_IWMMXT
)
29634 && ((mode
== V2SImode
)
29635 || (mode
== V4HImode
)
29636 || (mode
== V8QImode
)))
29639 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
29640 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
29641 || mode
== V2HAmode
))
29644 if (TARGET_HAVE_MVE
29645 && (VALID_MVE_SI_MODE (mode
) || VALID_MVE_PRED_MODE (mode
)))
29648 if (TARGET_HAVE_MVE_FLOAT
29649 && (mode
== V2DFmode
|| mode
== V4SFmode
|| mode
== V8HFmode
))
29655 /* Implements target hook array_mode_supported_p. */
29658 arm_array_mode_supported_p (machine_mode mode
,
29659 unsigned HOST_WIDE_INT nelems
)
29661 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29662 for now, as the lane-swapping logic needs to be extended in the expanders.
29663 See PR target/82518. */
29664 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
29665 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
29666 && (nelems
>= 2 && nelems
<= 4))
29669 if (TARGET_HAVE_MVE
&& !BYTES_BIG_ENDIAN
29670 && VALID_MVE_MODE (mode
) && (nelems
== 2 || nelems
== 4))
29676 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29677 registers when autovectorizing for Neon, at least until multiple vector
29678 widths are supported properly by the middle-end. */
29680 static machine_mode
29681 arm_preferred_simd_mode (scalar_mode mode
)
29687 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HFmode
: V8HFmode
;
29689 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
29691 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
29693 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
29695 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
29697 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
29704 if (TARGET_REALLY_IWMMXT
)
29717 if (TARGET_HAVE_MVE
)
29730 if (TARGET_HAVE_MVE_FLOAT
)
29744 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29746 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29747 using r0-r4 for function arguments, r7 for the stack frame and don't have
29748 enough left over to do doubleword arithmetic. For Thumb-2 all the
29749 potentially problematic instructions accept high registers so this is not
29750 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29751 that require many low registers. */
29753 arm_class_likely_spilled_p (reg_class_t rclass
)
29755 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
29756 || rclass
== CC_REG
)
29759 return default_class_likely_spilled_p (rclass
);
29762 /* Implements target hook small_register_classes_for_mode_p. */
29764 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
29766 return TARGET_THUMB1
;
29769 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29770 ARM insns and therefore guarantee that the shift count is modulo 256.
29771 DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29772 guarantee no particular behavior for out-of-range counts. */
29774 static unsigned HOST_WIDE_INT
29775 arm_shift_truncation_mask (machine_mode mode
)
29777 return mode
== SImode
? 255 : 0;
29781 /* Map internal gcc register numbers to DWARF2 register numbers. */
29784 arm_debugger_regno (unsigned int regno
)
29789 if (IS_VFP_REGNUM (regno
))
29791 /* See comment in arm_dwarf_register_span. */
29792 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29793 return 64 + regno
- FIRST_VFP_REGNUM
;
29795 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
29798 if (IS_IWMMXT_GR_REGNUM (regno
))
29799 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
29801 if (IS_IWMMXT_REGNUM (regno
))
29802 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
29804 if (IS_PAC_REGNUM (regno
))
29805 return DWARF_PAC_REGNUM
;
29807 return DWARF_FRAME_REGISTERS
;
29810 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29811 GCC models tham as 64 32-bit registers, so we need to describe this to
29812 the DWARF generation code. Other registers can use the default. */
29814 arm_dwarf_register_span (rtx rtl
)
29822 regno
= REGNO (rtl
);
29823 if (!IS_VFP_REGNUM (regno
))
29826 /* XXX FIXME: The EABI defines two VFP register ranges:
29827 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29829 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29830 corresponding D register. Until GDB supports this, we shall use the
29831 legacy encodings. We also use these encodings for D0-D15 for
29832 compatibility with older debuggers. */
29833 mode
= GET_MODE (rtl
);
29834 if (GET_MODE_SIZE (mode
) < 8)
29837 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29839 nregs
= GET_MODE_SIZE (mode
) / 4;
29840 for (i
= 0; i
< nregs
; i
+= 2)
29841 if (TARGET_BIG_END
)
29843 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29844 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
29848 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
29849 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29854 nregs
= GET_MODE_SIZE (mode
) / 8;
29855 for (i
= 0; i
< nregs
; i
++)
29856 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
29859 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
29862 #if ARM_UNWIND_INFO
29863 /* Emit unwind directives for a store-multiple instruction or stack pointer
29864 push during alignment.
29865 These should only ever be generated by the function prologue code, so
29866 expect them to have a particular form.
29867 The store-multiple instruction sometimes pushes pc as the last register,
29868 although it should not be tracked into unwind information, or for -Os
29869 sometimes pushes some dummy registers before first register that needs
29870 to be tracked in unwind information; such dummy registers are there just
29871 to avoid separate stack adjustment, and will not be restored in the
29875 arm_unwind_emit_sequence (FILE * out_file
, rtx p
)
29878 HOST_WIDE_INT offset
;
29879 HOST_WIDE_INT nregs
;
29883 unsigned padfirst
= 0, padlast
= 0;
29886 e
= XVECEXP (p
, 0, 0);
29887 gcc_assert (GET_CODE (e
) == SET
);
29889 /* First insn will adjust the stack pointer. */
29890 gcc_assert (GET_CODE (e
) == SET
29891 && REG_P (SET_DEST (e
))
29892 && REGNO (SET_DEST (e
)) == SP_REGNUM
29893 && GET_CODE (SET_SRC (e
)) == PLUS
);
29895 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
29896 nregs
= XVECLEN (p
, 0) - 1;
29897 gcc_assert (nregs
);
29899 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
29900 if (reg
< 16 || IS_PAC_REGNUM (reg
))
29902 /* For -Os dummy registers can be pushed at the beginning to
29903 avoid separate stack pointer adjustment. */
29904 e
= XVECEXP (p
, 0, 1);
29905 e
= XEXP (SET_DEST (e
), 0);
29906 if (GET_CODE (e
) == PLUS
)
29907 padfirst
= INTVAL (XEXP (e
, 1));
29908 gcc_assert (padfirst
== 0 || optimize_size
);
29909 /* The function prologue may also push pc, but not annotate it as it is
29910 never restored. We turn this into a stack pointer adjustment. */
29911 e
= XVECEXP (p
, 0, nregs
);
29912 e
= XEXP (SET_DEST (e
), 0);
29913 if (GET_CODE (e
) == PLUS
)
29914 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
29916 padlast
= offset
- 4;
29917 gcc_assert (padlast
== 0 || padlast
== 4);
29919 fprintf (out_file
, "\t.pad #4\n");
29921 fprintf (out_file
, "\t.save {");
29923 else if (IS_VFP_REGNUM (reg
))
29926 fprintf (out_file
, "\t.vsave {");
29929 /* Unknown register type. */
29930 gcc_unreachable ();
29932 /* If the stack increment doesn't match the size of the saved registers,
29933 something has gone horribly wrong. */
29934 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
29938 /* The remaining insns will describe the stores. */
29939 for (i
= 1; i
<= nregs
; i
++)
29941 /* Expect (set (mem <addr>) (reg)).
29942 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29943 e
= XVECEXP (p
, 0, i
);
29944 gcc_assert (GET_CODE (e
) == SET
29945 && MEM_P (SET_DEST (e
))
29946 && REG_P (SET_SRC (e
)));
29948 reg
= REGNO (SET_SRC (e
));
29949 gcc_assert (reg
>= lastreg
);
29952 fprintf (out_file
, ", ");
29953 /* We can't use %r for vfp because we need to use the
29954 double precision register names. */
29955 if (IS_VFP_REGNUM (reg
))
29956 asm_fprintf (out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
29957 else if (IS_PAC_REGNUM (reg
))
29958 asm_fprintf (asm_out_file
, "ra_auth_code");
29960 asm_fprintf (out_file
, "%r", reg
);
29964 /* Check that the addresses are consecutive. */
29965 e
= XEXP (SET_DEST (e
), 0);
29966 if (GET_CODE (e
) == PLUS
)
29967 gcc_assert (REG_P (XEXP (e
, 0))
29968 && REGNO (XEXP (e
, 0)) == SP_REGNUM
29969 && CONST_INT_P (XEXP (e
, 1))
29970 && offset
== INTVAL (XEXP (e
, 1)));
29974 && REGNO (e
) == SP_REGNUM
);
29975 offset
+= reg_size
;
29978 fprintf (out_file
, "}\n");
29980 fprintf (out_file
, "\t.pad #%d\n", padfirst
);
29983 /* Emit unwind directives for a SET. */
29986 arm_unwind_emit_set (FILE * out_file
, rtx p
)
29994 switch (GET_CODE (e0
))
29997 /* Pushing a single register. */
29998 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
29999 || !REG_P (XEXP (XEXP (e0
, 0), 0))
30000 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
30003 asm_fprintf (out_file
, "\t.save ");
30004 if (IS_VFP_REGNUM (REGNO (e1
)))
30005 asm_fprintf(out_file
, "{d%d}\n",
30006 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
30008 asm_fprintf(out_file
, "{%r}\n", REGNO (e1
));
30012 if (REGNO (e0
) == SP_REGNUM
)
30014 /* A stack increment. */
30015 if (GET_CODE (e1
) != PLUS
30016 || !REG_P (XEXP (e1
, 0))
30017 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
30018 || !CONST_INT_P (XEXP (e1
, 1)))
30021 asm_fprintf (out_file
, "\t.pad #%wd\n",
30022 -INTVAL (XEXP (e1
, 1)));
30024 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
30026 HOST_WIDE_INT offset
;
30028 if (GET_CODE (e1
) == PLUS
)
30030 if (!REG_P (XEXP (e1
, 0))
30031 || !CONST_INT_P (XEXP (e1
, 1)))
30033 reg
= REGNO (XEXP (e1
, 0));
30034 offset
= INTVAL (XEXP (e1
, 1));
30035 asm_fprintf (out_file
, "\t.setfp %r, %r, #%wd\n",
30036 HARD_FRAME_POINTER_REGNUM
, reg
,
30039 else if (REG_P (e1
))
30042 asm_fprintf (out_file
, "\t.setfp %r, %r\n",
30043 HARD_FRAME_POINTER_REGNUM
, reg
);
30048 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
30050 /* Move from sp to reg. */
30051 asm_fprintf (out_file
, "\t.movsp %r\n", REGNO (e0
));
30053 else if (GET_CODE (e1
) == PLUS
30054 && REG_P (XEXP (e1
, 0))
30055 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
30056 && CONST_INT_P (XEXP (e1
, 1)))
30058 /* Set reg to offset from sp. */
30059 asm_fprintf (out_file
, "\t.movsp %r, #%d\n",
30060 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
30062 else if (REGNO (e0
) == IP_REGNUM
&& arm_current_function_pac_enabled_p ())
30064 if (cfun
->machine
->pacspval_needed
)
30065 asm_fprintf (out_file
, "\t.pacspval\n");
30077 /* Emit unwind directives for the given insn. */
30080 arm_unwind_emit (FILE * out_file
, rtx_insn
*insn
)
30083 bool handled_one
= false;
30085 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
30088 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
30089 && (TREE_NOTHROW (current_function_decl
)
30090 || crtl
->all_throwers_are_sibcalls
))
30093 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
30096 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
30098 switch (REG_NOTE_KIND (note
))
30100 case REG_FRAME_RELATED_EXPR
:
30101 pat
= XEXP (note
, 0);
30104 case REG_CFA_REGISTER
:
30105 pat
= XEXP (note
, 0);
30108 pat
= PATTERN (insn
);
30109 if (GET_CODE (pat
) == PARALLEL
)
30110 pat
= XVECEXP (pat
, 0, 0);
30113 /* Only emitted for IS_STACKALIGN re-alignment. */
30118 src
= SET_SRC (pat
);
30119 dest
= SET_DEST (pat
);
30121 gcc_assert (src
== stack_pointer_rtx
30122 || IS_PAC_REGNUM (REGNO (src
)));
30123 reg
= REGNO (dest
);
30125 if (IS_PAC_REGNUM (REGNO (src
)))
30126 arm_unwind_emit_set (out_file
, PATTERN (insn
));
30128 asm_fprintf (out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
30131 handled_one
= true;
30134 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
30135 to get correct dwarf information for shrink-wrap. We should not
30136 emit unwind information for it because these are used either for
30137 pretend arguments or notes to adjust sp and restore registers from
30139 case REG_CFA_DEF_CFA
:
30140 case REG_CFA_ADJUST_CFA
:
30141 case REG_CFA_RESTORE
:
30144 case REG_CFA_EXPRESSION
:
30145 case REG_CFA_OFFSET
:
30146 /* ??? Only handling here what we actually emit. */
30147 gcc_unreachable ();
30155 pat
= PATTERN (insn
);
30158 switch (GET_CODE (pat
))
30161 arm_unwind_emit_set (out_file
, pat
);
30165 /* Store multiple. */
30166 arm_unwind_emit_sequence (out_file
, pat
);
30175 /* Output a reference from a function exception table to the type_info
30176 object X. The EABI specifies that the symbol should be relocated by
30177 an R_ARM_TARGET2 relocation. */
30180 arm_output_ttype (rtx x
)
30182 fputs ("\t.word\t", asm_out_file
);
30183 output_addr_const (asm_out_file
, x
);
30184 /* Use special relocations for symbol references. */
30185 if (!CONST_INT_P (x
))
30186 fputs ("(TARGET2)", asm_out_file
);
30187 fputc ('\n', asm_out_file
);
30192 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
30195 arm_asm_emit_except_personality (rtx personality
)
30197 fputs ("\t.personality\t", asm_out_file
);
30198 output_addr_const (asm_out_file
, personality
);
30199 fputc ('\n', asm_out_file
);
30201 #endif /* ARM_UNWIND_INFO */
30203 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
30206 arm_asm_init_sections (void)
30208 #if ARM_UNWIND_INFO
30209 exception_section
= get_unnamed_section (0, output_section_asm_op
,
30211 #endif /* ARM_UNWIND_INFO */
30213 #ifdef OBJECT_FORMAT_ELF
30214 if (target_pure_code
)
30215 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
30219 /* Output unwind directives for the start/end of a function. */
30222 arm_output_fn_unwind (FILE * f
, bool prologue
)
30224 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
30228 fputs ("\t.fnstart\n", f
);
30231 /* If this function will never be unwound, then mark it as such.
30232 The came condition is used in arm_unwind_emit to suppress
30233 the frame annotations. */
30234 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
30235 && (TREE_NOTHROW (current_function_decl
)
30236 || crtl
->all_throwers_are_sibcalls
))
30237 fputs("\t.cantunwind\n", f
);
30239 fputs ("\t.fnend\n", f
);
30244 arm_emit_tls_decoration (FILE *fp
, rtx x
)
30246 enum tls_reloc reloc
;
30249 val
= XVECEXP (x
, 0, 0);
30250 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
30252 output_addr_const (fp
, val
);
30257 fputs ("(tlsgd)", fp
);
30259 case TLS_GD32_FDPIC
:
30260 fputs ("(tlsgd_fdpic)", fp
);
30263 fputs ("(tlsldm)", fp
);
30265 case TLS_LDM32_FDPIC
:
30266 fputs ("(tlsldm_fdpic)", fp
);
30269 fputs ("(tlsldo)", fp
);
30272 fputs ("(gottpoff)", fp
);
30274 case TLS_IE32_FDPIC
:
30275 fputs ("(gottpoff_fdpic)", fp
);
30278 fputs ("(tpoff)", fp
);
30281 fputs ("(tlsdesc)", fp
);
30284 gcc_unreachable ();
30293 fputs (" + (. - ", fp
);
30294 output_addr_const (fp
, XVECEXP (x
, 0, 2));
30295 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30296 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
30297 output_addr_const (fp
, XVECEXP (x
, 0, 3));
30307 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
30310 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
30312 gcc_assert (size
== 4);
30313 fputs ("\t.word\t", file
);
30314 output_addr_const (file
, x
);
30315 fputs ("(tlsldo)", file
);
30318 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
30321 arm_output_addr_const_extra (FILE *fp
, rtx x
)
30323 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
30324 return arm_emit_tls_decoration (fp
, x
);
30325 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
30328 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
30330 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
30331 assemble_name_raw (fp
, label
);
30335 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
30337 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
30341 output_addr_const (fp
, XVECEXP (x
, 0, 0));
30345 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
30347 output_addr_const (fp
, XVECEXP (x
, 0, 0));
30351 output_addr_const (fp
, XVECEXP (x
, 0, 1));
30355 else if (GET_CODE (x
) == CONST_VECTOR
)
30356 return arm_emit_vector_const (fp
, x
);
30361 /* Output assembly for a shift instruction.
30362 SET_FLAGS determines how the instruction modifies the condition codes.
30363 0 - Do not set condition codes.
30364 1 - Set condition codes.
30365 2 - Use smallest instruction. */
30367 arm_output_shift(rtx
* operands
, int set_flags
)
30370 static const char flag_chars
[3] = {'?', '.', '!'};
30375 c
= flag_chars
[set_flags
];
30376 shift
= shift_op(operands
[3], &val
);
30380 operands
[2] = GEN_INT(val
);
30381 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
30384 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
30386 output_asm_insn (pattern
, operands
);
30390 /* Output assembly for a WMMX immediate shift instruction. */
30392 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
30394 int shift
= INTVAL (operands
[2]);
30396 machine_mode opmode
= GET_MODE (operands
[0]);
30398 gcc_assert (shift
>= 0);
30400 /* If the shift value in the register versions is > 63 (for D qualifier),
30401 31 (for W qualifier) or 15 (for H qualifier). */
30402 if (((opmode
== V4HImode
) && (shift
> 15))
30403 || ((opmode
== V2SImode
) && (shift
> 31))
30404 || ((opmode
== DImode
) && (shift
> 63)))
30408 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
30409 output_asm_insn (templ
, operands
);
30410 if (opmode
== DImode
)
30412 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
30413 output_asm_insn (templ
, operands
);
30418 /* The destination register will contain all zeros. */
30419 sprintf (templ
, "wzero\t%%0");
30420 output_asm_insn (templ
, operands
);
30425 if ((opmode
== DImode
) && (shift
> 32))
30427 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
30428 output_asm_insn (templ
, operands
);
30429 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
30430 output_asm_insn (templ
, operands
);
30434 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
30435 output_asm_insn (templ
, operands
);
30440 /* Output assembly for a WMMX tinsr instruction. */
30442 arm_output_iwmmxt_tinsr (rtx
*operands
)
30444 int mask
= INTVAL (operands
[3]);
30447 int units
= mode_nunits
[GET_MODE (operands
[0])];
30448 gcc_assert ((mask
& (mask
- 1)) == 0);
30449 for (i
= 0; i
< units
; ++i
)
30451 if ((mask
& 0x01) == 1)
30457 gcc_assert (i
< units
);
30459 switch (GET_MODE (operands
[0]))
30462 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
30465 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
30468 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
30471 gcc_unreachable ();
30474 output_asm_insn (templ
, operands
);
30479 /* Output an arm casesi dispatch sequence. Used by arm_casesi_internal insn.
30480 Responsible for the handling of switch statements in arm. */
30482 arm_output_casesi (rtx
*operands
)
30485 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
30486 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
30487 output_asm_insn ("cmp\t%0, %1", operands
);
30488 output_asm_insn ("bhi\t%l3", operands
);
30489 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
30490 switch (GET_MODE (diff_vec
))
30493 if (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
)
30494 output_asm_insn ("ldrb\t%4, [%5, %0]", operands
);
30496 output_asm_insn ("ldrsb\t%4, [%5, %0]", operands
);
30497 output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands
);
30500 if (REGNO (operands
[4]) != REGNO (operands
[5]))
30502 output_asm_insn ("add\t%4, %0, %0", operands
);
30503 if (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
)
30504 output_asm_insn ("ldrh\t%4, [%5, %4]", operands
);
30506 output_asm_insn ("ldrsh\t%4, [%5, %4]", operands
);
30510 output_asm_insn ("add\t%4, %5, %0", operands
);
30511 if (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
)
30512 output_asm_insn ("ldrh\t%4, [%4, %0]", operands
);
30514 output_asm_insn ("ldrsh\t%4, [%4, %0]", operands
);
30516 output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands
);
30521 output_asm_insn ("ldr\t%4, [%5, %0, lsl #2]", operands
);
30522 output_asm_insn ("add\t%|pc, %|pc, %4", operands
);
30525 output_asm_insn ("ldr\t%|pc, [%5, %0, lsl #2]", operands
);
30528 gcc_unreachable ();
30530 assemble_label (asm_out_file
, label
);
30531 output_asm_insn ("nop", operands
);
30535 /* Output a Thumb-1 casesi dispatch sequence. */
30537 thumb1_output_casesi (rtx
*operands
)
30539 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
30541 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
30543 switch (GET_MODE(diff_vec
))
30546 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
30547 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30549 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
30550 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30552 return "bl\t%___gnu_thumb1_case_si";
30554 gcc_unreachable ();
30558 /* Output a Thumb-2 casesi instruction. */
30560 thumb2_output_casesi (rtx
*operands
)
30562 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
30564 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
30566 output_asm_insn ("cmp\t%0, %1", operands
);
30567 output_asm_insn ("bhi\t%l3", operands
);
30568 switch (GET_MODE(diff_vec
))
30571 return "tbb\t[%|pc, %0]";
30573 return "tbh\t[%|pc, %0, lsl #1]";
30577 output_asm_insn ("adr\t%4, %l2", operands
);
30578 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
30579 output_asm_insn ("add\t%4, %4, %5", operands
);
30584 output_asm_insn ("adr\t%4, %l2", operands
);
30585 return "ldr\t%|pc, [%4, %0, lsl #2]";
30588 gcc_unreachable ();
30592 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
30593 per-core tuning structs. */
30595 arm_issue_rate (void)
30597 return current_tune
->issue_rate
;
30600 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
30602 arm_sched_variable_issue (FILE *, int, rtx_insn
*insn
, int more
)
30604 if (DEBUG_INSN_P (insn
))
30607 rtx_code code
= GET_CODE (PATTERN (insn
));
30608 if (code
== USE
|| code
== CLOBBER
)
30611 if (get_attr_type (insn
) == TYPE_NO_INSN
)
30617 /* Return how many instructions should scheduler lookahead to choose the
30620 arm_first_cycle_multipass_dfa_lookahead (void)
30622 int issue_rate
= arm_issue_rate ();
30624 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
30627 /* Enable modeling of L2 auto-prefetcher. */
30629 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
30631 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
30635 arm_mangle_type (const_tree type
)
30637 /* The ARM ABI documents (10th October 2008) say that "__va_list"
30638 has to be managled as if it is in the "std" namespace. */
30639 if (TARGET_AAPCS_BASED
30640 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
30641 return "St9__va_list";
30643 /* Half-precision floating point types. */
30644 if (SCALAR_FLOAT_TYPE_P (type
) && TYPE_PRECISION (type
) == 16)
30646 if (TYPE_MAIN_VARIANT (type
) == float16_type_node
)
30648 if (TYPE_MODE (type
) == BFmode
)
30654 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30656 if (TYPE_NAME (type
) != NULL
)
30657 return arm_mangle_builtin_type (type
);
30659 /* Use the default mangling. */
30663 /* Order of allocation of core registers for Thumb: this allocation is
30664 written over the corresponding initial entries of the array
30665 initialized with REG_ALLOC_ORDER. We allocate all low registers
30666 first. Saving and restoring a low register is usually cheaper than
30667 using a call-clobbered high register. */
30669 static const int thumb_core_reg_alloc_order
[] =
30671 3, 2, 1, 0, 4, 5, 6, 7,
30672 12, 14, 8, 9, 10, 11
30675 /* Adjust register allocation order when compiling for Thumb. */
30678 arm_order_regs_for_local_alloc (void)
30680 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
30681 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
30683 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
30684 sizeof (thumb_core_reg_alloc_order
));
30687 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30690 arm_frame_pointer_required (void)
30692 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
30695 /* If the function receives nonlocal gotos, it needs to save the frame
30696 pointer in the nonlocal_goto_save_area object. */
30697 if (cfun
->has_nonlocal_label
)
30700 /* The frame pointer is required for non-leaf APCS frames. */
30701 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
30704 /* If we are probing the stack in the prologue, we will have a faulting
30705 instruction prior to the stack adjustment and this requires a frame
30706 pointer if we want to catch the exception using the EABI unwinder. */
30707 if (!IS_INTERRUPT (arm_current_func_type ())
30708 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
30709 || flag_stack_clash_protection
)
30710 && arm_except_unwind_info (&global_options
) == UI_TARGET
30711 && cfun
->can_throw_non_call_exceptions
)
30713 HOST_WIDE_INT size
= get_frame_size ();
30715 /* That's irrelevant if there is no stack adjustment. */
30719 /* That's relevant only if there is a stack probe. */
30720 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
30722 /* We don't have the final size of the frame so adjust. */
30723 size
+= 32 * UNITS_PER_WORD
;
30724 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
30734 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30735 All modes except THUMB1 have conditional execution.
30736 If we have conditional arithmetic, return false before reload to
30737 enable some ifcvt transformations. */
30739 arm_have_conditional_execution (void)
30741 bool has_cond_exec
, enable_ifcvt_trans
;
30743 /* Only THUMB1 cannot support conditional execution. */
30744 has_cond_exec
= !TARGET_THUMB1
;
30746 /* Enable ifcvt transformations if we have conditional arithmetic, but only
30748 enable_ifcvt_trans
= TARGET_COND_ARITH
&& !reload_completed
;
30750 return has_cond_exec
&& !enable_ifcvt_trans
;
30753 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30754 static HOST_WIDE_INT
30755 arm_vector_alignment (const_tree type
)
30757 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
30759 if (TARGET_AAPCS_BASED
)
30760 align
= MIN (align
, 64);
30765 static unsigned int
30766 arm_autovectorize_vector_modes (vector_modes
*modes
, bool)
30768 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
30770 modes
->safe_push (V16QImode
);
30771 modes
->safe_push (V8QImode
);
30777 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
30779 /* Vectors which aren't in packed structures will not be less aligned than
30780 the natural alignment of their element type, so this is safe. */
30781 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30784 return default_builtin_vector_alignment_reachable (type
, is_packed
);
30788 arm_builtin_support_vector_misalignment (machine_mode mode
,
30789 const_tree type
, int misalignment
,
30792 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30794 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
30799 /* If the misalignment is unknown, we should be able to handle the access
30800 so long as it is not to a member of a packed data structure. */
30801 if (misalignment
== -1)
30804 /* Return true if the misalignment is a multiple of the natural alignment
30805 of the vector's element type. This is probably always going to be
30806 true in practice, since we've already established that this isn't a
30808 return ((misalignment
% align
) == 0);
30811 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
30816 arm_conditional_register_usage (void)
30820 if (TARGET_THUMB1
&& optimize_size
)
30822 /* When optimizing for size on Thumb-1, it's better not
30823 to use the HI regs, because of the overhead of
30825 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
30826 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
30829 /* The link register can be clobbered by any branch insn,
30830 but we have no way to track that at present, so mark
30831 it as unavailable. */
30833 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
30835 if (TARGET_32BIT
&& TARGET_VFP_BASE
)
30837 /* VFPv3 registers are disabled when earlier VFP
30838 versions are selected due to the definition of
30839 LAST_VFP_REGNUM. */
30840 for (regno
= FIRST_VFP_REGNUM
;
30841 regno
<= LAST_VFP_REGNUM
; ++ regno
)
30843 fixed_regs
[regno
] = 0;
30844 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
30845 || regno
>= FIRST_VFP_REGNUM
+ 32;
30847 if (TARGET_HAVE_MVE
)
30848 fixed_regs
[VPR_REGNUM
] = 0;
30851 if (TARGET_REALLY_IWMMXT
&& !TARGET_GENERAL_REGS_ONLY
)
30853 regno
= FIRST_IWMMXT_GR_REGNUM
;
30854 /* The 2002/10/09 revision of the XScale ABI has wCG0
30855 and wCG1 as call-preserved registers. The 2002/11/21
30856 revision changed this so that all wCG registers are
30857 scratch registers. */
30858 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
30859 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
30860 fixed_regs
[regno
] = 0;
30861 /* The XScale ABI has wR0 - wR9 as scratch registers,
30862 the rest as call-preserved registers. */
30863 for (regno
= FIRST_IWMMXT_REGNUM
;
30864 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
30866 fixed_regs
[regno
] = 0;
30867 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
30871 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
30873 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30874 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30876 else if (TARGET_APCS_STACK
)
30878 fixed_regs
[10] = 1;
30879 call_used_regs
[10] = 1;
30881 /* -mcaller-super-interworking reserves r11 for calls to
30882 _interwork_r11_call_via_rN(). Making the register global
30883 is an easy way of ensuring that it remains valid for all
30885 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
30886 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
30888 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30889 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30890 if (TARGET_CALLER_INTERWORKING
)
30891 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30894 /* The Q and GE bits are only accessed via special ACLE patterns. */
30895 CLEAR_HARD_REG_BIT (operand_reg_set
, APSRQ_REGNUM
);
30896 CLEAR_HARD_REG_BIT (operand_reg_set
, APSRGE_REGNUM
);
30898 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30902 arm_preferred_rename_class (reg_class_t rclass
)
30904 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30905 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30906 and code size can be reduced. */
30907 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
30913 /* Compute the attribute "length" of insn "*push_multi".
30914 So this function MUST be kept in sync with that insn pattern. */
30916 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
30918 int i
, regno
, hi_reg
;
30919 int num_saves
= XVECLEN (parallel_op
, 0);
30929 regno
= REGNO (first_op
);
30930 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30931 list is 8-bit. Normally this means all registers in the list must be
30932 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
30933 encodings. There is one exception for PUSH that LR in HI_REGS can be used
30934 with 16-bit encoding. */
30935 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30936 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
30938 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
30939 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30947 /* Compute the attribute "length" of insn. Currently, this function is used
30948 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30949 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
30950 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
30951 true if OPERANDS contains insn which explicit updates base register. */
30954 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
30963 rtx parallel_op
= operands
[0];
30964 /* Initialize to elements number of PARALLEL. */
30965 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
30966 /* Initialize the value to base register. */
30967 unsigned regno
= REGNO (operands
[1]);
30968 /* Skip return and write back pattern.
30969 We only need register pop pattern for later analysis. */
30970 unsigned first_indx
= 0;
30971 first_indx
+= return_pc
? 1 : 0;
30972 first_indx
+= write_back_p
? 1 : 0;
30974 /* A pop operation can be done through LDM or POP. If the base register is SP
30975 and if it's with write back, then a LDM will be alias of POP. */
30976 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
30977 bool ldm_p
= !pop_p
;
30979 /* Check base register for LDM. */
30980 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
30983 /* Check each register in the list. */
30984 for (; indx
>= first_indx
; indx
--)
30986 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
30987 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
30988 comment in arm_attr_length_push_multi. */
30989 if (REGNO_REG_CLASS (regno
) == HI_REGS
30990 && (regno
!= PC_REGNUM
|| ldm_p
))
30997 /* Compute the number of instructions emitted by output_move_double. */
30999 arm_count_output_move_double_insns (rtx
*operands
)
31003 /* output_move_double may modify the operands array, so call it
31004 here on a copy of the array. */
31005 ops
[0] = operands
[0];
31006 ops
[1] = operands
[1];
31007 output_move_double (ops
, false, &count
);
31011 /* Same as above, but operands are a register/memory pair in SImode.
31012 Assumes operands has the base register in position 0 and memory in position
31013 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
31015 arm_count_ldrdstrd_insns (rtx
*operands
, bool load
)
31019 int regnum
, memnum
;
31021 regnum
= 0, memnum
= 1;
31023 regnum
= 1, memnum
= 0;
31024 ops
[regnum
] = gen_rtx_REG (DImode
, REGNO (operands
[0]));
31025 ops
[memnum
] = adjust_address (operands
[2], DImode
, 0);
31026 output_move_double (ops
, false, &count
);
31032 vfp3_const_double_for_fract_bits (rtx operand
)
31034 REAL_VALUE_TYPE r0
;
31036 if (!CONST_DOUBLE_P (operand
))
31039 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
31040 if (exact_real_inverse (DFmode
, &r0
)
31041 && !REAL_VALUE_NEGATIVE (r0
))
31043 if (exact_real_truncate (DFmode
, &r0
))
31045 HOST_WIDE_INT value
= real_to_integer (&r0
);
31046 value
= value
& 0xffffffff;
31047 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
31049 int ret
= exact_log2 (value
);
31050 gcc_assert (IN_RANGE (ret
, 0, 31));
31058 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
31059 log2 is in [1, 32], return that log2. Otherwise return -1.
31060 This is used in the patterns for vcvt.s32.f32 floating-point to
31061 fixed-point conversions. */
31064 vfp3_const_double_for_bits (rtx x
)
31066 const REAL_VALUE_TYPE
*r
;
31068 if (!CONST_DOUBLE_P (x
))
31071 r
= CONST_DOUBLE_REAL_VALUE (x
);
31073 if (REAL_VALUE_NEGATIVE (*r
)
31074 || REAL_VALUE_ISNAN (*r
)
31075 || REAL_VALUE_ISINF (*r
)
31076 || !real_isinteger (r
, SFmode
))
31079 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
31081 /* The exact_log2 above will have returned -1 if this is
31082 not an exact log2. */
31083 if (!IN_RANGE (hwint
, 1, 32))
31090 /* Emit a memory barrier around an atomic sequence according to MODEL. */
31093 arm_pre_atomic_barrier (enum memmodel model
)
31095 if (need_atomic_barrier_p (model
, true))
31096 emit_insn (gen_memory_barrier ());
31100 arm_post_atomic_barrier (enum memmodel model
)
31102 if (need_atomic_barrier_p (model
, false))
31103 emit_insn (gen_memory_barrier ());
31106 /* Emit the load-exclusive and store-exclusive instructions.
31107 Use acquire and release versions if necessary. */
31110 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
31112 rtx (*gen
) (rtx
, rtx
);
31118 case E_QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
31119 case E_HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
31120 case E_SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
31121 case E_DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
31123 gcc_unreachable ();
31130 case E_QImode
: gen
= gen_arm_load_exclusiveqi
; break;
31131 case E_HImode
: gen
= gen_arm_load_exclusivehi
; break;
31132 case E_SImode
: gen
= gen_arm_load_exclusivesi
; break;
31133 case E_DImode
: gen
= gen_arm_load_exclusivedi
; break;
31135 gcc_unreachable ();
31139 emit_insn (gen (rval
, mem
));
31143 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
31146 rtx (*gen
) (rtx
, rtx
, rtx
);
31152 case E_QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
31153 case E_HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
31154 case E_SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
31155 case E_DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
31157 gcc_unreachable ();
31164 case E_QImode
: gen
= gen_arm_store_exclusiveqi
; break;
31165 case E_HImode
: gen
= gen_arm_store_exclusivehi
; break;
31166 case E_SImode
: gen
= gen_arm_store_exclusivesi
; break;
31167 case E_DImode
: gen
= gen_arm_store_exclusivedi
; break;
31169 gcc_unreachable ();
31173 emit_insn (gen (bval
, rval
, mem
));
31176 /* Mark the previous jump instruction as unlikely. */
31179 emit_unlikely_jump (rtx insn
)
31181 rtx_insn
*jump
= emit_jump_insn (insn
);
31182 add_reg_br_prob_note (jump
, profile_probability::very_unlikely ());
31185 /* Expand a compare and swap pattern. */
31188 arm_expand_compare_and_swap (rtx operands
[])
31190 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
31191 machine_mode mode
, cmp_mode
;
31193 bval
= operands
[0];
31194 rval
= operands
[1];
31196 oldval
= operands
[3];
31197 newval
= operands
[4];
31198 is_weak
= operands
[5];
31199 mod_s
= operands
[6];
31200 mod_f
= operands
[7];
31201 mode
= GET_MODE (mem
);
31203 /* Normally the succ memory model must be stronger than fail, but in the
31204 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
31205 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
31207 if (TARGET_HAVE_LDACQ
31208 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
31209 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
31210 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
31216 /* For narrow modes, we're going to perform the comparison in SImode,
31217 so do the zero-extension now. */
31218 rval
= gen_reg_rtx (SImode
);
31219 oldval
= convert_modes (SImode
, mode
, oldval
, true);
31223 /* Force the value into a register if needed. We waited until after
31224 the zero-extension above to do this properly. */
31225 if (!arm_add_operand (oldval
, SImode
))
31226 oldval
= force_reg (SImode
, oldval
);
31230 if (!cmpdi_operand (oldval
, mode
))
31231 oldval
= force_reg (mode
, oldval
);
31235 gcc_unreachable ();
31239 cmp_mode
= E_SImode
;
31241 cmp_mode
= CC_Zmode
;
31243 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
31244 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode
, mode
, bdst
, rval
, mem
,
31245 oldval
, newval
, is_weak
, mod_s
, mod_f
));
31247 if (mode
== QImode
|| mode
== HImode
)
31248 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
31250 /* In all cases, we arrange for success to be signaled by Z set.
31251 This arrangement allows for the boolean result to be used directly
31252 in a subsequent branch, post optimization. For Thumb-1 targets, the
31253 boolean negation of the result is also stored in bval because Thumb-1
31254 backend lacks dependency tracking for CC flag due to flag-setting not
31255 being represented at RTL level. */
31257 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
31260 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
31261 emit_insn (gen_rtx_SET (bval
, x
));
31265 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
31266 another memory store between the load-exclusive and store-exclusive can
31267 reset the monitor from Exclusive to Open state. This means we must wait
31268 until after reload to split the pattern, lest we get a register spill in
31269 the middle of the atomic sequence. Success of the compare and swap is
31270 indicated by the Z flag set for 32bit targets and by neg_bval being zero
31271 for Thumb-1 targets (ie. negation of the boolean value returned by
31272 atomic_compare_and_swapmode standard pattern in operand 0). */
31275 arm_split_compare_and_swap (rtx operands
[])
31277 rtx rval
, mem
, oldval
, newval
, neg_bval
, mod_s_rtx
;
31279 enum memmodel mod_s
, mod_f
;
31281 rtx_code_label
*label1
, *label2
;
31284 rval
= operands
[1];
31286 oldval
= operands
[3];
31287 newval
= operands
[4];
31288 is_weak
= (operands
[5] != const0_rtx
);
31289 mod_s_rtx
= operands
[6];
31290 mod_s
= memmodel_from_int (INTVAL (mod_s_rtx
));
31291 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
31292 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
31293 mode
= GET_MODE (mem
);
31295 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
31297 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (mod_s_rtx
);
31298 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (mod_s_rtx
);
31300 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
31301 a full barrier is emitted after the store-release. */
31303 use_acquire
= false;
31305 /* Checks whether a barrier is needed and emits one accordingly. */
31306 if (!(use_acquire
|| use_release
))
31307 arm_pre_atomic_barrier (mod_s
);
31312 label1
= gen_label_rtx ();
31313 emit_label (label1
);
31315 label2
= gen_label_rtx ();
31317 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
31319 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31320 as required to communicate with arm_expand_compare_and_swap. */
31323 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
31324 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
31325 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
31326 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
31327 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
31331 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
31332 if (thumb1_cmpneg_operand (oldval
, SImode
))
31335 if (!satisfies_constraint_L (oldval
))
31337 gcc_assert (satisfies_constraint_J (oldval
));
31339 /* For such immediates, ADDS needs the source and destination regs
31342 Normally this would be handled by RA, but this is all happening
31344 emit_move_insn (neg_bval
, rval
);
31348 emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval
, src
, oldval
,
31353 emit_move_insn (neg_bval
, const1_rtx
);
31354 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
31358 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
31360 /* Weak or strong, we want EQ to be true for success, so that we
31361 match the flags that we got from the compare above. */
31364 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
31365 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
31366 emit_insn (gen_rtx_SET (cond
, x
));
31371 /* Z is set to boolean value of !neg_bval, as required to communicate
31372 with arm_expand_compare_and_swap. */
31373 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
31374 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
31377 if (!is_mm_relaxed (mod_f
))
31378 emit_label (label2
);
31380 /* Checks whether a barrier is needed and emits one accordingly. */
31382 || !(use_acquire
|| use_release
))
31383 arm_post_atomic_barrier (mod_s
);
31385 if (is_mm_relaxed (mod_f
))
31386 emit_label (label2
);
31389 /* Split an atomic operation pattern. Operation is given by CODE and is one
31390 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31391 operation). Operation is performed on the content at MEM and on VALUE
31392 following the memory model MODEL_RTX. The content at MEM before and after
31393 the operation is returned in OLD_OUT and NEW_OUT respectively while the
31394 success of the operation is returned in COND. Using a scratch register or
31395 an operand register for these determines what result is returned for that
31399 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
31400 rtx value
, rtx model_rtx
, rtx cond
)
31402 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
31403 machine_mode mode
= GET_MODE (mem
);
31404 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
31405 rtx_code_label
*label
;
31406 bool all_low_regs
, bind_old_new
;
31409 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
31411 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (model_rtx
);
31412 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (model_rtx
);
31414 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
31415 a full barrier is emitted after the store-release. */
31417 use_acquire
= false;
31419 /* Checks whether a barrier is needed and emits one accordingly. */
31420 if (!(use_acquire
|| use_release
))
31421 arm_pre_atomic_barrier (model
);
31423 label
= gen_label_rtx ();
31424 emit_label (label
);
31427 new_out
= gen_lowpart (wmode
, new_out
);
31429 old_out
= gen_lowpart (wmode
, old_out
);
31432 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
31434 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
31436 /* Does the operation require destination and first operand to use the same
31437 register? This is decided by register constraints of relevant insn
31438 patterns in thumb1.md. */
31439 gcc_assert (!new_out
|| REG_P (new_out
));
31440 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
31441 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
31442 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
31447 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
31449 /* We want to return the old value while putting the result of the operation
31450 in the same register as the old value so copy the old value over to the
31451 destination register and use that register for the operation. */
31452 if (old_out
&& bind_old_new
)
31454 emit_move_insn (new_out
, old_out
);
31465 x
= gen_rtx_AND (wmode
, old_out
, value
);
31466 emit_insn (gen_rtx_SET (new_out
, x
));
31467 x
= gen_rtx_NOT (wmode
, new_out
);
31468 emit_insn (gen_rtx_SET (new_out
, x
));
31472 if (CONST_INT_P (value
))
31474 value
= gen_int_mode (-INTVAL (value
), wmode
);
31480 if (mode
== DImode
)
31482 /* DImode plus/minus need to clobber flags. */
31483 /* The adddi3 and subdi3 patterns are incorrectly written so that
31484 they require matching operands, even when we could easily support
31485 three operands. Thankfully, this can be fixed up post-splitting,
31486 as the individual add+adc patterns do accept three operands and
31487 post-reload cprop can make these moves go away. */
31488 emit_move_insn (new_out
, old_out
);
31490 x
= gen_adddi3 (new_out
, new_out
, value
);
31492 x
= gen_subdi3 (new_out
, new_out
, value
);
31499 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
31500 emit_insn (gen_rtx_SET (new_out
, x
));
31504 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
31507 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
31508 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
31510 /* Checks whether a barrier is needed and emits one accordingly. */
31512 || !(use_acquire
|| use_release
))
31513 arm_post_atomic_barrier (model
);
31516 /* Return the mode for the MVE vector of predicates corresponding to MODE. */
31518 arm_mode_to_pred_mode (machine_mode mode
)
31520 switch (GET_MODE_NUNITS (mode
))
31522 case 16: return V16BImode
;
31523 case 8: return V8BImode
;
31524 case 4: return V4BImode
;
31525 case 2: return V2QImode
;
31527 return opt_machine_mode ();
31530 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31531 If CAN_INVERT, store either the result or its inverse in TARGET
31532 and return true if TARGET contains the inverse. If !CAN_INVERT,
31533 always store the result in TARGET, never its inverse.
31535 Note that the handling of floating-point comparisons is not
31539 arm_expand_vector_compare (rtx target
, rtx_code code
, rtx op0
, rtx op1
,
31542 machine_mode cmp_result_mode
= GET_MODE (target
);
31543 machine_mode cmp_mode
= GET_MODE (op0
);
31547 /* MVE supports more comparisons than Neon. */
31548 if (TARGET_HAVE_MVE
)
31553 /* For these we need to compute the inverse of the requested
31562 code
= reverse_condition_maybe_unordered (code
);
31565 /* Recursively emit the inverted comparison into a temporary
31566 and then store its inverse in TARGET. This avoids reusing
31567 TARGET (which for integer NE could be one of the inputs). */
31568 rtx tmp
= gen_reg_rtx (cmp_result_mode
);
31569 if (arm_expand_vector_compare (tmp
, code
, op0
, op1
, true))
31570 gcc_unreachable ();
31571 emit_insn (gen_rtx_SET (target
, gen_rtx_NOT (cmp_result_mode
, tmp
)));
31584 /* These are natively supported by Neon for zero comparisons, but otherwise
31585 require the operands to be swapped. For MVE, we can only compare
31589 if (!TARGET_HAVE_MVE
)
31590 if (op1
!= CONST0_RTX (cmp_mode
))
31592 code
= swap_condition (code
);
31593 std::swap (op0
, op1
);
31595 /* Fall through. */
31597 /* These are natively supported by Neon for both register and zero
31598 operands. MVE supports registers only. */
31603 if (TARGET_HAVE_MVE
)
31605 switch (GET_MODE_CLASS (cmp_mode
))
31607 case MODE_VECTOR_INT
:
31608 emit_insn (gen_mve_vcmpq (code
, cmp_mode
, target
,
31609 op0
, force_reg (cmp_mode
, op1
)));
31611 case MODE_VECTOR_FLOAT
:
31612 if (TARGET_HAVE_MVE_FLOAT
)
31613 emit_insn (gen_mve_vcmpq_f (code
, cmp_mode
, target
,
31614 op0
, force_reg (cmp_mode
, op1
)));
31616 gcc_unreachable ();
31619 gcc_unreachable ();
31623 emit_insn (gen_neon_vc (code
, cmp_mode
, target
, op0
, op1
));
31626 /* These are natively supported for register operands only.
31627 Comparisons with zero aren't useful and should be folded
31628 or canonicalized by target-independent code. */
31631 if (TARGET_HAVE_MVE
)
31632 emit_insn (gen_mve_vcmpq (code
, cmp_mode
, target
,
31633 op0
, force_reg (cmp_mode
, op1
)));
31635 emit_insn (gen_neon_vc (code
, cmp_mode
, target
,
31636 op0
, force_reg (cmp_mode
, op1
)));
31639 /* These require the operands to be swapped and likewise do not
31640 support comparisons with zero. */
31643 if (TARGET_HAVE_MVE
)
31644 emit_insn (gen_mve_vcmpq (swap_condition (code
), cmp_mode
, target
,
31645 force_reg (cmp_mode
, op1
), op0
));
31647 emit_insn (gen_neon_vc (swap_condition (code
), cmp_mode
,
31648 target
, force_reg (cmp_mode
, op1
), op0
));
31651 /* These need a combination of two comparisons. */
31655 /* Operands are LTGT iff (a > b || a > b).
31656 Operands are ORDERED iff (a > b || a <= b). */
31657 rtx gt_res
= gen_reg_rtx (cmp_result_mode
);
31658 rtx alt_res
= gen_reg_rtx (cmp_result_mode
);
31659 rtx_code alt_code
= (code
== LTGT
? LT
: LE
);
31660 if (arm_expand_vector_compare (gt_res
, GT
, op0
, op1
, true)
31661 || arm_expand_vector_compare (alt_res
, alt_code
, op0
, op1
, true))
31662 gcc_unreachable ();
31663 emit_insn (gen_rtx_SET (target
, gen_rtx_IOR (cmp_result_mode
,
31664 gt_res
, alt_res
)));
31669 gcc_unreachable ();
31673 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31674 CMP_RESULT_MODE is the mode of the comparison result. */
31677 arm_expand_vcond (rtx
*operands
, machine_mode cmp_result_mode
)
31679 /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31680 arm_expand_vector_compare, and another one here. */
31683 if (TARGET_HAVE_MVE
)
31684 mask
= gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode
).require ());
31686 mask
= gen_reg_rtx (cmp_result_mode
);
31688 bool inverted
= arm_expand_vector_compare (mask
, GET_CODE (operands
[3]),
31689 operands
[4], operands
[5], true);
31691 std::swap (operands
[1], operands
[2]);
31693 emit_insn (gen_neon_vbsl (GET_MODE (operands
[0]), operands
[0],
31694 mask
, operands
[1], operands
[2]));
31697 machine_mode cmp_mode
= GET_MODE (operands
[0]);
31699 switch (GET_MODE_CLASS (cmp_mode
))
31701 case MODE_VECTOR_INT
:
31702 emit_insn (gen_mve_q (VPSELQ_S
, VPSELQ_S
, cmp_mode
, operands
[0],
31703 operands
[1], operands
[2], mask
));
31705 case MODE_VECTOR_FLOAT
:
31706 if (TARGET_HAVE_MVE_FLOAT
)
31707 emit_insn (gen_mve_q_f (VPSELQ_F
, cmp_mode
, operands
[0],
31708 operands
[1], operands
[2], mask
));
31710 gcc_unreachable ();
31713 gcc_unreachable ();
31718 #define MAX_VECT_LEN 16
31720 struct expand_vec_perm_d
31722 rtx target
, op0
, op1
;
31723 vec_perm_indices perm
;
31724 machine_mode vmode
;
31729 /* Generate a variable permutation. */
31732 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
31734 machine_mode vmode
= GET_MODE (target
);
31735 bool one_vector_p
= rtx_equal_p (op0
, op1
);
31737 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
31738 gcc_checking_assert (GET_MODE (op0
) == vmode
);
31739 gcc_checking_assert (GET_MODE (op1
) == vmode
);
31740 gcc_checking_assert (GET_MODE (sel
) == vmode
);
31741 gcc_checking_assert (TARGET_NEON
);
31745 if (vmode
== V8QImode
)
31746 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
31748 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
31754 if (vmode
== V8QImode
)
31756 pair
= gen_reg_rtx (V16QImode
);
31757 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
31758 pair
= gen_lowpart (TImode
, pair
);
31759 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
31763 pair
= gen_reg_rtx (OImode
);
31764 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
31765 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
31771 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
31773 machine_mode vmode
= GET_MODE (target
);
31774 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
31775 bool one_vector_p
= rtx_equal_p (op0
, op1
);
31778 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31779 numbering of elements for big-endian, we must reverse the order. */
31780 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
31782 /* The VTBL instruction does not use a modulo index, so we must take care
31783 of that ourselves. */
31784 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31785 mask
= gen_const_vec_duplicate (vmode
, mask
);
31786 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
31788 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
31791 /* Map lane ordering between architectural lane order, and GCC lane order,
31792 taking into account ABI. See comment above output_move_neon for details. */
31795 neon_endian_lane_map (machine_mode mode
, int lane
)
31797 if (BYTES_BIG_ENDIAN
)
31799 int nelems
= GET_MODE_NUNITS (mode
);
31800 /* Reverse lane order. */
31801 lane
= (nelems
- 1 - lane
);
31802 /* Reverse D register order, to match ABI. */
31803 if (GET_MODE_SIZE (mode
) == 16)
31804 lane
= lane
^ (nelems
/ 2);
31809 /* Some permutations index into pairs of vectors, this is a helper function
31810 to map indexes into those pairs of vectors. */
31813 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
31815 int nelem
= GET_MODE_NUNITS (mode
);
31816 if (BYTES_BIG_ENDIAN
)
31818 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
31822 /* Generate or test for an insn that supports a constant permutation. */
31824 /* Recognize patterns for the VUZP insns. */
31827 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
31829 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
31830 rtx out0
, out1
, in0
, in1
;
31834 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
31837 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31838 big endian pattern on 64 bit vectors, so we correct for that. */
31839 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
31840 && GET_MODE_SIZE (d
->vmode
) == 8 ? nelt
: 0;
31842 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
31844 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
31846 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
31850 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31852 for (i
= 0; i
< nelt
; i
++)
31855 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
31856 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
31866 if (swap_nelt
!= 0)
31867 std::swap (in0
, in1
);
31870 out1
= gen_reg_rtx (d
->vmode
);
31872 std::swap (out0
, out1
);
31874 emit_insn (gen_neon_vuzp_internal (d
->vmode
, out0
, in0
, in1
, out1
));
31878 /* Recognize patterns for the VZIP insns. */
31881 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
31883 unsigned int i
, high
, mask
, nelt
= d
->perm
.length ();
31884 rtx out0
, out1
, in0
, in1
;
31888 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
31891 is_swapped
= BYTES_BIG_ENDIAN
;
31893 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
31896 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
31898 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
31902 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31904 for (i
= 0; i
< nelt
/ 2; i
++)
31907 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
31908 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
31912 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
31913 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
31925 std::swap (in0
, in1
);
31928 out1
= gen_reg_rtx (d
->vmode
);
31930 std::swap (out0
, out1
);
31932 emit_insn (gen_neon_vzip_internal (d
->vmode
, out0
, in0
, in1
, out1
));
31936 /* Recognize patterns for the VREV insns. */
31938 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
31940 unsigned int i
, j
, diff
, nelt
= d
->perm
.length ();
31941 rtx (*gen
) (machine_mode
, rtx
, rtx
);
31943 if (!d
->one_vector_p
)
31954 gen
= gen_neon_vrev64
;
31965 gen
= gen_neon_vrev32
;
31971 gen
= gen_neon_vrev64
;
31982 gen
= gen_neon_vrev16
;
31986 gen
= gen_neon_vrev32
;
31992 gen
= gen_neon_vrev64
;
32002 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
32003 for (j
= 0; j
<= diff
; j
+= 1)
32005 /* This is guaranteed to be true as the value of diff
32006 is 7, 3, 1 and we should have enough elements in the
32007 queue to generate this. Getting a vector mask with a
32008 value of diff other than these values implies that
32009 something is wrong by the time we get here. */
32010 gcc_assert (i
+ j
< nelt
);
32011 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
32019 emit_insn (gen (d
->vmode
, d
->target
, d
->op0
));
32023 /* Recognize patterns for the VTRN insns. */
32026 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
32028 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
32029 rtx out0
, out1
, in0
, in1
;
32031 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
32034 /* Note that these are little-endian tests. Adjust for big-endian later. */
32035 if (d
->perm
[0] == 0)
32037 else if (d
->perm
[0] == 1)
32041 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
32043 for (i
= 0; i
< nelt
; i
+= 2)
32045 if (d
->perm
[i
] != i
+ odd
)
32047 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
32057 if (BYTES_BIG_ENDIAN
)
32059 std::swap (in0
, in1
);
32064 out1
= gen_reg_rtx (d
->vmode
);
32066 std::swap (out0
, out1
);
32068 emit_insn (gen_neon_vtrn_internal (d
->vmode
, out0
, in0
, in1
, out1
));
32072 /* Recognize patterns for the VEXT insns. */
32075 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
32077 unsigned int i
, nelt
= d
->perm
.length ();
32080 unsigned int location
;
32082 unsigned int next
= d
->perm
[0] + 1;
32084 /* TODO: Handle GCC's numbering of elements for big-endian. */
32085 if (BYTES_BIG_ENDIAN
)
32088 /* Check if the extracted indexes are increasing by one. */
32089 for (i
= 1; i
< nelt
; next
++, i
++)
32091 /* If we hit the most significant element of the 2nd vector in
32092 the previous iteration, no need to test further. */
32093 if (next
== 2 * nelt
)
32096 /* If we are operating on only one vector: it could be a
32097 rotation. If there are only two elements of size < 64, let
32098 arm_evpc_neon_vrev catch it. */
32099 if (d
->one_vector_p
&& (next
== nelt
))
32101 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
32107 if (d
->perm
[i
] != next
)
32111 location
= d
->perm
[0];
32117 offset
= GEN_INT (location
);
32119 if(d
->vmode
== E_DImode
)
32122 emit_insn (gen_neon_vext (d
->vmode
, d
->target
, d
->op0
, d
->op1
, offset
));
32126 /* The NEON VTBL instruction is a fully variable permuation that's even
32127 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
32128 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
32129 can do slightly better by expanding this as a constant where we don't
32130 have to apply a mask. */
32133 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
32135 rtx rperm
[MAX_VECT_LEN
], sel
;
32136 machine_mode vmode
= d
->vmode
;
32137 unsigned int i
, nelt
= d
->perm
.length ();
32139 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
32140 numbering of elements for big-endian, we must reverse the order. */
32141 if (BYTES_BIG_ENDIAN
)
32147 /* Generic code will try constant permutation twice. Once with the
32148 original mode and again with the elements lowered to QImode.
32149 So wait and don't do the selector expansion ourselves. */
32150 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
32153 for (i
= 0; i
< nelt
; ++i
)
32154 rperm
[i
] = GEN_INT (d
->perm
[i
]);
32155 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
32156 sel
= force_reg (vmode
, sel
);
32158 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
32163 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
32165 /* Check if the input mask matches vext before reordering the
32168 if (arm_evpc_neon_vext (d
))
32171 /* The pattern matching functions above are written to look for a small
32172 number to begin the sequence (0, 1, N/2). If we begin with an index
32173 from the second operand, we can swap the operands. */
32174 unsigned int nelt
= d
->perm
.length ();
32175 if (d
->perm
[0] >= nelt
)
32177 d
->perm
.rotate_inputs (1);
32178 std::swap (d
->op0
, d
->op1
);
32183 if (arm_evpc_neon_vuzp (d
))
32185 if (arm_evpc_neon_vzip (d
))
32187 if (arm_evpc_neon_vrev (d
))
32189 if (arm_evpc_neon_vtrn (d
))
32191 return arm_evpc_neon_vtbl (d
);
32196 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
32199 arm_vectorize_vec_perm_const (machine_mode vmode
, machine_mode op_mode
,
32200 rtx target
, rtx op0
, rtx op1
,
32201 const vec_perm_indices
&sel
)
32203 if (vmode
!= op_mode
)
32206 struct expand_vec_perm_d d
;
32207 int i
, nelt
, which
;
32209 if (!VALID_NEON_DREG_MODE (vmode
) && !VALID_NEON_QREG_MODE (vmode
))
32215 rtx nop0
= force_reg (vmode
, op0
);
32221 op1
= force_reg (vmode
, op1
);
32226 gcc_assert (VECTOR_MODE_P (d
.vmode
));
32227 d
.testing_p
= !target
;
32229 nelt
= GET_MODE_NUNITS (d
.vmode
);
32230 for (i
= which
= 0; i
< nelt
; ++i
)
32232 int ei
= sel
[i
] & (2 * nelt
- 1);
32233 which
|= (ei
< nelt
? 1 : 2);
32242 d
.one_vector_p
= false;
32243 if (d
.testing_p
|| !rtx_equal_p (op0
, op1
))
32246 /* The elements of PERM do not suggest that only the first operand
32247 is used, but both operands are identical. Allow easier matching
32248 of the permutation by folding the permutation into the single
32253 d
.one_vector_p
= true;
32258 d
.one_vector_p
= true;
32262 d
.perm
.new_vector (sel
.encoding (), d
.one_vector_p
? 1 : 2, nelt
);
32265 return arm_expand_vec_perm_const_1 (&d
);
32267 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
32268 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
32269 if (!d
.one_vector_p
)
32270 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
32273 bool ret
= arm_expand_vec_perm_const_1 (&d
);
32280 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
32282 /* If we are soft float and we do not have ldrd
32283 then all auto increment forms are ok. */
32284 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
32289 /* Post increment and Pre Decrement are supported for all
32290 instruction forms except for vector forms. */
32293 if (VECTOR_MODE_P (mode
))
32295 if (code
!= ARM_PRE_DEC
)
32305 /* Without LDRD and mode size greater than
32306 word size, there is no point in auto-incrementing
32307 because ldm and stm will not have these forms. */
32308 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
32311 /* Vector and floating point modes do not support
32312 these auto increment forms. */
32313 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
32326 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32327 on ARM, since we know that shifts by negative amounts are no-ops.
32328 Additionally, the default expansion code is not available or suitable
32329 for post-reload insn splits (this can occur when the register allocator
32330 chooses not to do a shift in NEON).
32332 This function is used in both initial expand and post-reload splits, and
32333 handles all kinds of 64-bit shifts.
32335 Input requirements:
32336 - It is safe for the input and output to be the same register, but
32337 early-clobber rules apply for the shift amount and scratch registers.
32338 - Shift by register requires both scratch registers. In all other cases
32339 the scratch registers may be NULL.
32340 - Ashiftrt by a register also clobbers the CC register. */
32342 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
32343 rtx amount
, rtx scratch1
, rtx scratch2
)
32345 rtx out_high
= gen_highpart (SImode
, out
);
32346 rtx out_low
= gen_lowpart (SImode
, out
);
32347 rtx in_high
= gen_highpart (SImode
, in
);
32348 rtx in_low
= gen_lowpart (SImode
, in
);
32351 in = the register pair containing the input value.
32352 out = the destination register pair.
32353 up = the high- or low-part of each pair.
32354 down = the opposite part to "up".
32355 In a shift, we can consider bits to shift from "up"-stream to
32356 "down"-stream, so in a left-shift "up" is the low-part and "down"
32357 is the high-part of each register pair. */
32359 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
32360 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
32361 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
32362 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
32364 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
32366 && (REG_P (out
) || SUBREG_P (out
))
32367 && GET_MODE (out
) == DImode
);
32369 && (REG_P (in
) || SUBREG_P (in
))
32370 && GET_MODE (in
) == DImode
);
32372 && (((REG_P (amount
) || SUBREG_P (amount
))
32373 && GET_MODE (amount
) == SImode
)
32374 || CONST_INT_P (amount
)));
32375 gcc_assert (scratch1
== NULL
32376 || (GET_CODE (scratch1
) == SCRATCH
)
32377 || (GET_MODE (scratch1
) == SImode
32378 && REG_P (scratch1
)));
32379 gcc_assert (scratch2
== NULL
32380 || (GET_CODE (scratch2
) == SCRATCH
)
32381 || (GET_MODE (scratch2
) == SImode
32382 && REG_P (scratch2
)));
32383 gcc_assert (!REG_P (out
) || !REG_P (amount
)
32384 || !HARD_REGISTER_P (out
)
32385 || (REGNO (out
) != REGNO (amount
)
32386 && REGNO (out
) + 1 != REGNO (amount
)));
32388 /* Macros to make following code more readable. */
32389 #define SUB_32(DEST,SRC) \
32390 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32391 #define RSB_32(DEST,SRC) \
32392 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32393 #define SUB_S_32(DEST,SRC) \
32394 gen_addsi3_compare0 ((DEST), (SRC), \
32396 #define SET(DEST,SRC) \
32397 gen_rtx_SET ((DEST), (SRC))
32398 #define SHIFT(CODE,SRC,AMOUNT) \
32399 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32400 #define LSHIFT(CODE,SRC,AMOUNT) \
32401 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32402 SImode, (SRC), (AMOUNT))
32403 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32404 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32405 SImode, (SRC), (AMOUNT))
32407 gen_rtx_IOR (SImode, (A), (B))
32408 #define BRANCH(COND,LABEL) \
32409 gen_arm_cond_branch ((LABEL), \
32410 gen_rtx_ ## COND (CCmode, cc_reg, \
32414 /* Shifts by register and shifts by constant are handled separately. */
32415 if (CONST_INT_P (amount
))
32417 /* We have a shift-by-constant. */
32419 /* First, handle out-of-range shift amounts.
32420 In both cases we try to match the result an ARM instruction in a
32421 shift-by-register would give. This helps reduce execution
32422 differences between optimization levels, but it won't stop other
32423 parts of the compiler doing different things. This is "undefined
32424 behavior, in any case. */
32425 if (INTVAL (amount
) <= 0)
32426 emit_insn (gen_movdi (out
, in
));
32427 else if (INTVAL (amount
) >= 64)
32429 if (code
== ASHIFTRT
)
32431 rtx const31_rtx
= GEN_INT (31);
32432 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
32433 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
32436 emit_insn (gen_movdi (out
, const0_rtx
));
32439 /* Now handle valid shifts. */
32440 else if (INTVAL (amount
) < 32)
32442 /* Shifts by a constant less than 32. */
32443 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
32445 /* Clearing the out register in DImode first avoids lots
32446 of spilling and results in less stack usage.
32447 Later this redundant insn is completely removed.
32448 Do that only if "in" and "out" are different registers. */
32449 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
32450 emit_insn (SET (out
, const0_rtx
));
32451 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
32452 emit_insn (SET (out_down
,
32453 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
32455 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
32459 /* Shifts by a constant greater than 31. */
32460 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
32462 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
32463 emit_insn (SET (out
, const0_rtx
));
32464 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
32465 if (code
== ASHIFTRT
)
32466 emit_insn (gen_ashrsi3 (out_up
, in_up
,
32469 emit_insn (SET (out_up
, const0_rtx
));
32474 /* We have a shift-by-register. */
32475 rtx cc_reg
= gen_rtx_REG (CC_NZmode
, CC_REGNUM
);
32477 /* This alternative requires the scratch registers. */
32478 gcc_assert (scratch1
&& REG_P (scratch1
));
32479 gcc_assert (scratch2
&& REG_P (scratch2
));
32481 /* We will need the values "amount-32" and "32-amount" later.
32482 Swapping them around now allows the later code to be more general. */
32486 emit_insn (SUB_32 (scratch1
, amount
));
32487 emit_insn (RSB_32 (scratch2
, amount
));
32490 emit_insn (RSB_32 (scratch1
, amount
));
32491 /* Also set CC = amount > 32. */
32492 emit_insn (SUB_S_32 (scratch2
, amount
));
32495 emit_insn (RSB_32 (scratch1
, amount
));
32496 emit_insn (SUB_32 (scratch2
, amount
));
32499 gcc_unreachable ();
32502 /* Emit code like this:
32505 out_down = in_down << amount;
32506 out_down = (in_up << (amount - 32)) | out_down;
32507 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32508 out_up = in_up << amount;
32511 out_down = in_down >> amount;
32512 out_down = (in_up << (32 - amount)) | out_down;
32514 out_down = ((signed)in_up >> (amount - 32)) | out_down;
32515 out_up = in_up << amount;
32518 out_down = in_down >> amount;
32519 out_down = (in_up << (32 - amount)) | out_down;
32521 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32522 out_up = in_up << amount;
32524 The ARM and Thumb2 variants are the same but implemented slightly
32525 differently. If this were only called during expand we could just
32526 use the Thumb2 case and let combine do the right thing, but this
32527 can also be called from post-reload splitters. */
32529 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
32531 if (!TARGET_THUMB2
)
32533 /* Emit code for ARM mode. */
32534 emit_insn (SET (out_down
,
32535 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
32536 if (code
== ASHIFTRT
)
32538 rtx_code_label
*done_label
= gen_label_rtx ();
32539 emit_jump_insn (BRANCH (LT
, done_label
));
32540 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
32542 emit_label (done_label
);
32545 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
32550 /* Emit code for Thumb2 mode.
32551 Thumb2 can't do shift and or in one insn. */
32552 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
32553 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
32555 if (code
== ASHIFTRT
)
32557 rtx_code_label
*done_label
= gen_label_rtx ();
32558 emit_jump_insn (BRANCH (LT
, done_label
));
32559 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
32560 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
32561 emit_label (done_label
);
32565 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
32566 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
32570 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
32584 /* Returns true if the pattern is a valid symbolic address, which is either a
32585 symbol_ref or (symbol_ref + addend).
32587 According to the ARM ELF ABI, the initial addend of REL-type relocations
32588 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32589 literal field of the instruction as a 16-bit signed value in the range
32590 -32768 <= A < 32768.
32592 In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32593 unsigned range of 0 <= A < 256 as described in the AAELF32
32594 relocation handling documentation: REL-type relocations are encoded
32595 as unsigned in this case. */
32598 arm_valid_symbolic_address_p (rtx addr
)
32600 rtx xop0
, xop1
= NULL_RTX
;
32603 if (target_word_relocations
)
32606 if (SYMBOL_REF_P (tmp
) || LABEL_REF_P (tmp
))
32609 /* (const (plus: symbol_ref const_int)) */
32610 if (GET_CODE (addr
) == CONST
)
32611 tmp
= XEXP (addr
, 0);
32613 if (GET_CODE (tmp
) == PLUS
)
32615 xop0
= XEXP (tmp
, 0);
32616 xop1
= XEXP (tmp
, 1);
32618 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
32620 if (TARGET_THUMB1
&& !TARGET_HAVE_MOVT
)
32621 return IN_RANGE (INTVAL (xop1
), 0, 0xff);
32623 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
32630 /* Returns true if a valid comparison operation and makes
32631 the operands in a form that is valid. */
32633 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
32635 enum rtx_code code
= GET_CODE (*comparison
);
32637 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
32638 ? GET_MODE (*op2
) : GET_MODE (*op1
);
32640 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
32642 if (code
== UNEQ
|| code
== LTGT
)
32645 code_int
= (int)code
;
32646 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
32647 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
32652 if (!arm_add_operand (*op1
, mode
))
32653 *op1
= force_reg (mode
, *op1
);
32654 if (!arm_add_operand (*op2
, mode
))
32655 *op2
= force_reg (mode
, *op2
);
32659 /* gen_compare_reg() will sort out any invalid operands. */
32663 if (!TARGET_VFP_FP16INST
)
32665 /* FP16 comparisons are done in SF mode. */
32667 *op1
= convert_to_mode (mode
, *op1
, 1);
32668 *op2
= convert_to_mode (mode
, *op2
, 1);
32669 /* Fall through. */
32672 if (!vfp_compare_operand (*op1
, mode
))
32673 *op1
= force_reg (mode
, *op1
);
32674 if (!vfp_compare_operand (*op2
, mode
))
32675 *op2
= force_reg (mode
, *op2
);
32685 /* Maximum number of instructions to set block of memory. */
32687 arm_block_set_max_insns (void)
32689 if (optimize_function_for_size_p (cfun
))
32692 return current_tune
->max_insns_inline_memset
;
32695 /* Return TRUE if it's profitable to set block of memory for
32696 non-vectorized case. VAL is the value to set the memory
32697 with. LENGTH is the number of bytes to set. ALIGN is the
32698 alignment of the destination memory in bytes. UNALIGNED_P
32699 is TRUE if we can only set the memory with instructions
32700 meeting alignment requirements. USE_STRD_P is TRUE if we
32701 can use strd to set the memory. */
32703 arm_block_set_non_vect_profit_p (rtx val
,
32704 unsigned HOST_WIDE_INT length
,
32705 unsigned HOST_WIDE_INT align
,
32706 bool unaligned_p
, bool use_strd_p
)
32709 /* For leftovers in bytes of 0-7, we can set the memory block using
32710 strb/strh/str with minimum instruction number. */
32711 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32715 num
= arm_const_inline_cost (SET
, val
);
32716 num
+= length
/ align
+ length
% align
;
32718 else if (use_strd_p
)
32720 num
= arm_const_double_inline_cost (val
);
32721 num
+= (length
>> 3) + leftover
[length
& 7];
32725 num
= arm_const_inline_cost (SET
, val
);
32726 num
+= (length
>> 2) + leftover
[length
& 3];
32729 /* We may be able to combine last pair STRH/STRB into a single STR
32730 by shifting one byte back. */
32731 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
32734 return (num
<= arm_block_set_max_insns ());
32737 /* Return TRUE if it's profitable to set block of memory for
32738 vectorized case. LENGTH is the number of bytes to set.
32739 ALIGN is the alignment of destination memory in bytes.
32740 MODE is the vector mode used to set the memory. */
32742 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
32743 unsigned HOST_WIDE_INT align
,
32747 bool unaligned_p
= ((align
& 3) != 0);
32748 unsigned int nelt
= GET_MODE_NUNITS (mode
);
32750 /* Instruction loading constant value. */
32752 /* Instructions storing the memory. */
32753 num
+= (length
+ nelt
- 1) / nelt
;
32754 /* Instructions adjusting the address expression. Only need to
32755 adjust address expression if it's 4 bytes aligned and bytes
32756 leftover can only be stored by mis-aligned store instruction. */
32757 if (!unaligned_p
&& (length
& 3) != 0)
32760 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
32761 if (!unaligned_p
&& mode
== V16QImode
)
32764 return (num
<= arm_block_set_max_insns ());
32767 /* Set a block of memory using vectorization instructions for the
32768 unaligned case. We fill the first LENGTH bytes of the memory
32769 area starting from DSTBASE with byte constant VALUE. ALIGN is
32770 the alignment requirement of memory. Return TRUE if succeeded. */
32772 arm_block_set_unaligned_vect (rtx dstbase
,
32773 unsigned HOST_WIDE_INT length
,
32774 unsigned HOST_WIDE_INT value
,
32775 unsigned HOST_WIDE_INT align
)
32777 unsigned int i
, nelt_v16
, nelt_v8
, nelt_mode
;
32780 rtx (*gen_func
) (rtx
, rtx
);
32782 unsigned HOST_WIDE_INT v
= value
;
32783 unsigned int offset
= 0;
32784 gcc_assert ((align
& 0x3) != 0);
32785 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
32786 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
32787 if (length
>= nelt_v16
)
32790 gen_func
= gen_movmisalignv16qi
;
32795 gen_func
= gen_movmisalignv8qi
;
32797 nelt_mode
= GET_MODE_NUNITS (mode
);
32798 gcc_assert (length
>= nelt_mode
);
32799 /* Skip if it isn't profitable. */
32800 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
32803 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32804 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32806 v
= sext_hwi (v
, BITS_PER_WORD
);
32808 reg
= gen_reg_rtx (mode
);
32809 val_vec
= gen_const_vec_duplicate (mode
, GEN_INT (v
));
32810 /* Emit instruction loading the constant value. */
32811 emit_move_insn (reg
, val_vec
);
32813 /* Handle nelt_mode bytes in a vector. */
32814 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
32816 emit_insn ((*gen_func
) (mem
, reg
));
32817 if (i
+ 2 * nelt_mode
<= length
)
32819 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
32820 offset
+= nelt_mode
;
32821 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32825 /* If there are not less than nelt_v8 bytes leftover, we must be in
32827 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
32829 /* Handle (8, 16) bytes leftover. */
32830 if (i
+ nelt_v8
< length
)
32832 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
32833 offset
+= length
- i
;
32834 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32836 /* We are shifting bytes back, set the alignment accordingly. */
32837 if ((length
& 1) != 0 && align
>= 2)
32838 set_mem_align (mem
, BITS_PER_UNIT
);
32840 emit_insn (gen_movmisalignv16qi (mem
, reg
));
32842 /* Handle (0, 8] bytes leftover. */
32843 else if (i
< length
&& i
+ nelt_v8
>= length
)
32845 if (mode
== V16QImode
)
32846 reg
= gen_lowpart (V8QImode
, reg
);
32848 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
32849 + (nelt_mode
- nelt_v8
))));
32850 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
32851 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
32853 /* We are shifting bytes back, set the alignment accordingly. */
32854 if ((length
& 1) != 0 && align
>= 2)
32855 set_mem_align (mem
, BITS_PER_UNIT
);
32857 emit_insn (gen_movmisalignv8qi (mem
, reg
));
32863 /* Set a block of memory using vectorization instructions for the
32864 aligned case. We fill the first LENGTH bytes of the memory area
32865 starting from DSTBASE with byte constant VALUE. ALIGN is the
32866 alignment requirement of memory. Return TRUE if succeeded. */
32868 arm_block_set_aligned_vect (rtx dstbase
,
32869 unsigned HOST_WIDE_INT length
,
32870 unsigned HOST_WIDE_INT value
,
32871 unsigned HOST_WIDE_INT align
)
32873 unsigned int i
, nelt_v8
, nelt_v16
, nelt_mode
;
32874 rtx dst
, addr
, mem
;
32877 unsigned int offset
= 0;
32879 gcc_assert ((align
& 0x3) == 0);
32880 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
32881 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
32882 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
32887 nelt_mode
= GET_MODE_NUNITS (mode
);
32888 gcc_assert (length
>= nelt_mode
);
32889 /* Skip if it isn't profitable. */
32890 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
32893 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32895 reg
= gen_reg_rtx (mode
);
32896 val_vec
= gen_const_vec_duplicate (mode
, gen_int_mode (value
, QImode
));
32897 /* Emit instruction loading the constant value. */
32898 emit_move_insn (reg
, val_vec
);
32901 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
32902 if (mode
== V16QImode
)
32904 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32905 emit_insn (gen_movmisalignv16qi (mem
, reg
));
32907 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
32908 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
32910 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
32911 offset
+= length
- nelt_mode
;
32912 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32913 /* We are shifting bytes back, set the alignment accordingly. */
32914 if ((length
& 0x3) == 0)
32915 set_mem_align (mem
, BITS_PER_UNIT
* 4);
32916 else if ((length
& 0x1) == 0)
32917 set_mem_align (mem
, BITS_PER_UNIT
* 2);
32919 set_mem_align (mem
, BITS_PER_UNIT
);
32921 emit_insn (gen_movmisalignv16qi (mem
, reg
));
32924 /* Fall through for bytes leftover. */
32926 nelt_mode
= GET_MODE_NUNITS (mode
);
32927 reg
= gen_lowpart (V8QImode
, reg
);
32930 /* Handle 8 bytes in a vector. */
32931 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
32933 addr
= plus_constant (Pmode
, dst
, i
);
32934 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
32935 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
32936 emit_move_insn (mem
, reg
);
32938 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
32941 /* Handle single word leftover by shifting 4 bytes back. We can
32942 use aligned access for this case. */
32943 if (i
+ UNITS_PER_WORD
== length
)
32945 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
32946 offset
+= i
- UNITS_PER_WORD
;
32947 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
32948 /* We are shifting 4 bytes back, set the alignment accordingly. */
32949 if (align
> UNITS_PER_WORD
)
32950 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
32952 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
32954 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32955 We have to use unaligned access for this case. */
32956 else if (i
< length
)
32958 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
32959 offset
+= length
- nelt_mode
;
32960 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32961 /* We are shifting bytes back, set the alignment accordingly. */
32962 if ((length
& 1) == 0)
32963 set_mem_align (mem
, BITS_PER_UNIT
* 2);
32965 set_mem_align (mem
, BITS_PER_UNIT
);
32967 emit_insn (gen_movmisalignv8qi (mem
, reg
));
32973 /* Set a block of memory using plain strh/strb instructions, only
32974 using instructions allowed by ALIGN on processor. We fill the
32975 first LENGTH bytes of the memory area starting from DSTBASE
32976 with byte constant VALUE. ALIGN is the alignment requirement
32979 arm_block_set_unaligned_non_vect (rtx dstbase
,
32980 unsigned HOST_WIDE_INT length
,
32981 unsigned HOST_WIDE_INT value
,
32982 unsigned HOST_WIDE_INT align
)
32985 rtx dst
, addr
, mem
;
32986 rtx val_exp
, val_reg
, reg
;
32988 HOST_WIDE_INT v
= value
;
32990 gcc_assert (align
== 1 || align
== 2);
32993 v
|= (value
<< BITS_PER_UNIT
);
32995 v
= sext_hwi (v
, BITS_PER_WORD
);
32996 val_exp
= GEN_INT (v
);
32997 /* Skip if it isn't profitable. */
32998 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32999 align
, true, false))
33002 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
33003 mode
= (align
== 2 ? HImode
: QImode
);
33004 val_reg
= force_reg (SImode
, val_exp
);
33005 reg
= gen_lowpart (mode
, val_reg
);
33007 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
33009 addr
= plus_constant (Pmode
, dst
, i
);
33010 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
33011 emit_move_insn (mem
, reg
);
33014 /* Handle single byte leftover. */
33015 if (i
+ 1 == length
)
33017 reg
= gen_lowpart (QImode
, val_reg
);
33018 addr
= plus_constant (Pmode
, dst
, i
);
33019 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
33020 emit_move_insn (mem
, reg
);
33024 gcc_assert (i
== length
);
33028 /* Set a block of memory using plain strd/str/strh/strb instructions,
33029 to permit unaligned copies on processors which support unaligned
33030 semantics for those instructions. We fill the first LENGTH bytes
33031 of the memory area starting from DSTBASE with byte constant VALUE.
33032 ALIGN is the alignment requirement of memory. */
33034 arm_block_set_aligned_non_vect (rtx dstbase
,
33035 unsigned HOST_WIDE_INT length
,
33036 unsigned HOST_WIDE_INT value
,
33037 unsigned HOST_WIDE_INT align
)
33040 rtx dst
, addr
, mem
;
33041 rtx val_exp
, val_reg
, reg
;
33042 unsigned HOST_WIDE_INT v
;
33045 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
33046 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
33048 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
33049 if (length
< UNITS_PER_WORD
)
33050 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
33053 v
|= (v
<< BITS_PER_WORD
);
33055 v
= sext_hwi (v
, BITS_PER_WORD
);
33057 val_exp
= GEN_INT (v
);
33058 /* Skip if it isn't profitable. */
33059 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
33060 align
, false, use_strd_p
))
33065 /* Try without strd. */
33066 v
= (v
>> BITS_PER_WORD
);
33067 v
= sext_hwi (v
, BITS_PER_WORD
);
33068 val_exp
= GEN_INT (v
);
33069 use_strd_p
= false;
33070 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
33071 align
, false, use_strd_p
))
33076 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
33077 /* Handle double words using strd if possible. */
33080 val_reg
= force_reg (DImode
, val_exp
);
33082 for (; (i
+ 8 <= length
); i
+= 8)
33084 addr
= plus_constant (Pmode
, dst
, i
);
33085 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
33086 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
33087 emit_move_insn (mem
, reg
);
33089 emit_insn (gen_unaligned_storedi (mem
, reg
));
33093 val_reg
= force_reg (SImode
, val_exp
);
33095 /* Handle words. */
33096 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
33097 for (; (i
+ 4 <= length
); i
+= 4)
33099 addr
= plus_constant (Pmode
, dst
, i
);
33100 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
33101 if ((align
& 3) == 0)
33102 emit_move_insn (mem
, reg
);
33104 emit_insn (gen_unaligned_storesi (mem
, reg
));
33107 /* Merge last pair of STRH and STRB into a STR if possible. */
33108 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
33110 addr
= plus_constant (Pmode
, dst
, i
- 1);
33111 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
33112 /* We are shifting one byte back, set the alignment accordingly. */
33113 if ((align
& 1) == 0)
33114 set_mem_align (mem
, BITS_PER_UNIT
);
33116 /* Most likely this is an unaligned access, and we can't tell at
33117 compilation time. */
33118 emit_insn (gen_unaligned_storesi (mem
, reg
));
33122 /* Handle half word leftover. */
33123 if (i
+ 2 <= length
)
33125 reg
= gen_lowpart (HImode
, val_reg
);
33126 addr
= plus_constant (Pmode
, dst
, i
);
33127 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
33128 if ((align
& 1) == 0)
33129 emit_move_insn (mem
, reg
);
33131 emit_insn (gen_unaligned_storehi (mem
, reg
));
33136 /* Handle single byte leftover. */
33137 if (i
+ 1 == length
)
33139 reg
= gen_lowpart (QImode
, val_reg
);
33140 addr
= plus_constant (Pmode
, dst
, i
);
33141 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
33142 emit_move_insn (mem
, reg
);
33148 /* Set a block of memory using vectorization instructions for both
33149 aligned and unaligned cases. We fill the first LENGTH bytes of
33150 the memory area starting from DSTBASE with byte constant VALUE.
33151 ALIGN is the alignment requirement of memory. */
33153 arm_block_set_vect (rtx dstbase
,
33154 unsigned HOST_WIDE_INT length
,
33155 unsigned HOST_WIDE_INT value
,
33156 unsigned HOST_WIDE_INT align
)
33158 /* Check whether we need to use unaligned store instruction. */
33159 if (((align
& 3) != 0 || (length
& 3) != 0)
33160 /* Check whether unaligned store instruction is available. */
33161 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
33164 if ((align
& 3) == 0)
33165 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
33167 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
33170 /* Expand string store operation. Firstly we try to do that by using
33171 vectorization instructions, then try with ARM unaligned access and
33172 double-word store if profitable. OPERANDS[0] is the destination,
33173 OPERANDS[1] is the number of bytes, operands[2] is the value to
33174 initialize the memory, OPERANDS[3] is the known alignment of the
33177 arm_gen_setmem (rtx
*operands
)
33179 rtx dstbase
= operands
[0];
33180 unsigned HOST_WIDE_INT length
;
33181 unsigned HOST_WIDE_INT value
;
33182 unsigned HOST_WIDE_INT align
;
33184 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
33187 length
= UINTVAL (operands
[1]);
33191 value
= (UINTVAL (operands
[2]) & 0xFF);
33192 align
= UINTVAL (operands
[3]);
33193 if (TARGET_NEON
&& length
>= 8
33194 && current_tune
->string_ops_prefer_neon
33195 && arm_block_set_vect (dstbase
, length
, value
, align
))
33198 if (!unaligned_access
&& (align
& 3) != 0)
33199 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
33201 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
33206 arm_macro_fusion_p (void)
33208 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
33211 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
33212 for MOVW / MOVT macro fusion. */
33215 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
33217 /* We are trying to fuse
33218 movw imm / movt imm
33219 instructions as a group that gets scheduled together. */
33221 rtx set_dest
= SET_DEST (curr_set
);
33223 if (GET_MODE (set_dest
) != SImode
)
33226 /* We are trying to match:
33227 prev (movw) == (set (reg r0) (const_int imm16))
33228 curr (movt) == (set (zero_extract (reg r0)
33231 (const_int imm16_1))
33233 prev (movw) == (set (reg r1)
33234 (high (symbol_ref ("SYM"))))
33235 curr (movt) == (set (reg r0)
33237 (symbol_ref ("SYM")))) */
33239 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
33241 if (CONST_INT_P (SET_SRC (curr_set
))
33242 && CONST_INT_P (SET_SRC (prev_set
))
33243 && REG_P (XEXP (set_dest
, 0))
33244 && REG_P (SET_DEST (prev_set
))
33245 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
33249 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
33250 && REG_P (SET_DEST (curr_set
))
33251 && REG_P (SET_DEST (prev_set
))
33252 && GET_CODE (SET_SRC (prev_set
)) == HIGH
33253 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
33260 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
33262 rtx prev_set
= single_set (prev
);
33263 rtx curr_set
= single_set (curr
);
33269 if (any_condjump_p (curr
))
33272 if (!arm_macro_fusion_p ())
33275 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
33276 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
33282 /* Return true iff the instruction fusion described by OP is enabled. */
33284 arm_fusion_enabled_p (tune_params::fuse_ops op
)
33286 return current_tune
->fusible_ops
& op
;
33289 /* Return TRUE if return address signing mechanism is enabled. */
33291 arm_current_function_pac_enabled_p (void)
33293 return (aarch_ra_sign_scope
== AARCH_FUNCTION_ALL
33294 || (aarch_ra_sign_scope
== AARCH_FUNCTION_NON_LEAF
33295 && !crtl
->is_leaf
));
33298 /* Raise an error if the current target arch is not bti compatible. */
33299 void aarch_bti_arch_check (void)
33301 if (!arm_arch8m_main
)
33302 error ("This architecture does not support branch protection instructions");
33305 /* Return TRUE if Branch Target Identification Mechanism is enabled. */
33307 aarch_bti_enabled (void)
33309 return aarch_enable_bti
!= 0;
33312 /* Check if INSN is a BTI J insn. */
33314 aarch_bti_j_insn_p (rtx_insn
*insn
)
33316 if (!insn
|| !INSN_P (insn
))
33319 rtx pat
= PATTERN (insn
);
33320 return GET_CODE (pat
) == UNSPEC_VOLATILE
&& XINT (pat
, 1) == VUNSPEC_BTI_NOP
;
33323 /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction. */
33325 aarch_pac_insn_p (rtx x
)
33327 if (!x
|| !INSN_P (x
))
33330 rtx pat
= PATTERN (x
);
33332 if (GET_CODE (pat
) == SET
)
33334 rtx tmp
= XEXP (pat
, 1);
33336 && ((GET_CODE (tmp
) == UNSPEC
33337 && XINT (tmp
, 1) == UNSPEC_PAC_NOP
)
33338 || (GET_CODE (tmp
) == UNSPEC_VOLATILE
33339 && XINT (tmp
, 1) == VUNSPEC_PACBTI_NOP
)))
33346 /* Target specific mapping for aarch_gen_bti_c and aarch_gen_bti_j.
33347 For Arm, both of these map to a simple BTI instruction. */
33350 aarch_gen_bti_c (void)
33352 return gen_bti_nop ();
33356 aarch_gen_bti_j (void)
33358 return gen_bti_nop ();
33361 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
33362 scheduled for speculative execution. Reject the long-running division
33363 and square-root instructions. */
33366 arm_sched_can_speculate_insn (rtx_insn
*insn
)
33368 switch (get_attr_type (insn
))
33376 case TYPE_NEON_FP_SQRT_S
:
33377 case TYPE_NEON_FP_SQRT_D
:
33378 case TYPE_NEON_FP_SQRT_S_Q
:
33379 case TYPE_NEON_FP_SQRT_D_Q
:
33380 case TYPE_NEON_FP_DIV_S
:
33381 case TYPE_NEON_FP_DIV_D
:
33382 case TYPE_NEON_FP_DIV_S_Q
:
33383 case TYPE_NEON_FP_DIV_D_Q
:
33390 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
33392 static unsigned HOST_WIDE_INT
33393 arm_asan_shadow_offset (void)
33395 return HOST_WIDE_INT_1U
<< 29;
33399 /* This is a temporary fix for PR60655. Ideally we need
33400 to handle most of these cases in the generic part but
33401 currently we reject minus (..) (sym_ref). We try to
33402 ameliorate the case with minus (sym_ref1) (sym_ref2)
33403 where they are in the same section. */
33406 arm_const_not_ok_for_debug_p (rtx p
)
33408 tree decl_op0
= NULL
;
33409 tree decl_op1
= NULL
;
33411 if (GET_CODE (p
) == UNSPEC
)
33413 if (GET_CODE (p
) == MINUS
)
33415 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
33417 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
33419 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
33420 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
33422 if ((VAR_P (decl_op1
)
33423 || TREE_CODE (decl_op1
) == CONST_DECL
)
33424 && (VAR_P (decl_op0
)
33425 || TREE_CODE (decl_op0
) == CONST_DECL
))
33426 return (get_variable_section (decl_op1
, false)
33427 != get_variable_section (decl_op0
, false));
33429 if (TREE_CODE (decl_op1
) == LABEL_DECL
33430 && TREE_CODE (decl_op0
) == LABEL_DECL
)
33431 return (DECL_CONTEXT (decl_op1
)
33432 != DECL_CONTEXT (decl_op0
));
33442 /* return TRUE if x is a reference to a value in a constant pool */
33444 arm_is_constant_pool_ref (rtx x
)
33447 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
33448 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
33451 /* Remember the last target of arm_set_current_function. */
33452 static GTY(()) tree arm_previous_fndecl
;
33454 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
33457 save_restore_target_globals (tree new_tree
)
33459 /* If we have a previous state, use it. */
33460 if (TREE_TARGET_GLOBALS (new_tree
))
33461 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
33462 else if (new_tree
== target_option_default_node
)
33463 restore_target_globals (&default_target_globals
);
33466 /* Call target_reinit and save the state for TARGET_GLOBALS. */
33467 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
33470 arm_option_params_internal ();
33473 /* Invalidate arm_previous_fndecl. */
33476 arm_reset_previous_fndecl (void)
33478 arm_previous_fndecl
= NULL_TREE
;
33481 /* Establish appropriate back-end context for processing the function
33482 FNDECL. The argument might be NULL to indicate processing at top
33483 level, outside of any function scope. */
33486 arm_set_current_function (tree fndecl
)
33488 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
33491 tree old_tree
= (arm_previous_fndecl
33492 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
33495 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
33497 /* If current function has no attributes but previous one did,
33498 use the default node. */
33499 if (! new_tree
&& old_tree
)
33500 new_tree
= target_option_default_node
;
33502 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
33503 the default have been handled by save_restore_target_globals from
33504 arm_pragma_target_parse. */
33505 if (old_tree
== new_tree
)
33508 arm_previous_fndecl
= fndecl
;
33510 /* First set the target options. */
33511 cl_target_option_restore (&global_options
, &global_options_set
,
33512 TREE_TARGET_OPTION (new_tree
));
33514 save_restore_target_globals (new_tree
);
33516 arm_override_options_after_change_1 (&global_options
, &global_options_set
);
33519 /* Implement TARGET_OPTION_PRINT. */
33522 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
33524 int flags
= ptr
->x_target_flags
;
33525 const char *fpu_name
;
33527 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
33528 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
33530 fprintf (file
, "%*sselected isa %s\n", indent
, "",
33531 TARGET_THUMB2_P (flags
) ? "thumb2" :
33532 TARGET_THUMB_P (flags
) ? "thumb1" :
33535 if (ptr
->x_arm_arch_string
)
33536 fprintf (file
, "%*sselected architecture %s\n", indent
, "",
33537 ptr
->x_arm_arch_string
);
33539 if (ptr
->x_arm_cpu_string
)
33540 fprintf (file
, "%*sselected CPU %s\n", indent
, "",
33541 ptr
->x_arm_cpu_string
);
33543 if (ptr
->x_arm_tune_string
)
33544 fprintf (file
, "%*sselected tune %s\n", indent
, "",
33545 ptr
->x_arm_tune_string
);
33547 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
33550 /* Hook to determine if one function can safely inline another. */
33553 arm_can_inline_p (tree caller
, tree callee
)
33555 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
33556 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
33557 bool can_inline
= true;
33559 struct cl_target_option
*caller_opts
33560 = TREE_TARGET_OPTION (caller_tree
? caller_tree
33561 : target_option_default_node
);
33563 struct cl_target_option
*callee_opts
33564 = TREE_TARGET_OPTION (callee_tree
? callee_tree
33565 : target_option_default_node
);
33567 if (callee_opts
== caller_opts
)
33570 /* Callee's ISA features should be a subset of the caller's. */
33571 struct arm_build_target caller_target
;
33572 struct arm_build_target callee_target
;
33573 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
33574 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
33576 arm_configure_build_target (&caller_target
, caller_opts
, false);
33577 arm_configure_build_target (&callee_target
, callee_opts
, false);
33578 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
33579 can_inline
= false;
33581 sbitmap_free (caller_target
.isa
);
33582 sbitmap_free (callee_target
.isa
);
33584 /* OK to inline between different modes.
33585 Function with mode specific instructions, e.g using asm,
33586 must be explicitly protected with noinline. */
33590 /* Hook to fix function's alignment affected by target attribute. */
33593 arm_relayout_function (tree fndecl
)
33595 if (DECL_USER_ALIGN (fndecl
))
33598 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
33601 callee_tree
= target_option_default_node
;
33603 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
33606 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
33609 /* Inner function to process the attribute((target(...))), take an argument and
33610 set the current options from the argument. If we have a list, recursively
33611 go over the list. */
33614 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
33616 if (TREE_CODE (args
) == TREE_LIST
)
33620 for (; args
; args
= TREE_CHAIN (args
))
33621 if (TREE_VALUE (args
)
33622 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
33627 else if (TREE_CODE (args
) != STRING_CST
)
33629 error ("attribute %<target%> argument not a string");
33633 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
33636 while ((q
= strtok (argstr
, ",")) != NULL
)
33639 if (!strcmp (q
, "thumb"))
33641 opts
->x_target_flags
|= MASK_THUMB
;
33642 if (TARGET_FDPIC
&& !arm_arch_thumb2
)
33643 sorry ("FDPIC mode is not supported in Thumb-1 mode");
33646 else if (!strcmp (q
, "arm"))
33647 opts
->x_target_flags
&= ~MASK_THUMB
;
33649 else if (!strcmp (q
, "general-regs-only"))
33650 opts
->x_target_flags
|= MASK_GENERAL_REGS_ONLY
;
33652 else if (startswith (q
, "fpu="))
33655 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+ 4,
33656 &fpu_index
, CL_TARGET
))
33658 error ("invalid fpu for target attribute or pragma %qs", q
);
33661 if (fpu_index
== TARGET_FPU_auto
)
33663 /* This doesn't really make sense until we support
33664 general dynamic selection of the architecture and all
33666 sorry ("auto fpu selection not currently permitted here");
33669 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
33671 else if (startswith (q
, "arch="))
33673 char *arch
= q
+ 5;
33674 const arch_option
*arm_selected_arch
33675 = arm_parse_arch_option_name (all_architectures
, "arch", arch
);
33677 if (!arm_selected_arch
)
33679 error ("invalid architecture for target attribute or pragma %qs",
33684 opts
->x_arm_arch_string
= xstrndup (arch
, strlen (arch
));
33686 else if (q
[0] == '+')
33688 opts
->x_arm_arch_string
33689 = xasprintf ("%s%s", opts
->x_arm_arch_string
, q
);
33693 error ("unknown target attribute or pragma %qs", q
);
33701 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
33704 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
33705 struct gcc_options
*opts_set
)
33707 struct cl_target_option cl_opts
;
33709 if (!arm_valid_target_attribute_rec (args
, opts
))
33712 cl_target_option_save (&cl_opts
, opts
, opts_set
);
33713 arm_configure_build_target (&arm_active_target
, &cl_opts
, false);
33714 arm_option_check_internal (opts
);
33715 /* Do any overrides, such as global options arch=xxx.
33716 We do this since arm_active_target was overridden. */
33717 arm_option_reconfigure_globals ();
33718 arm_options_perform_arch_sanity_checks ();
33719 arm_option_override_internal (opts
, opts_set
);
33721 return build_target_option_node (opts
, opts_set
);
33725 add_attribute (const char * mode
, tree
*attributes
)
33727 size_t len
= strlen (mode
);
33728 tree value
= build_string (len
, mode
);
33730 TREE_TYPE (value
) = build_array_type (char_type_node
,
33731 build_index_type (size_int (len
)));
33733 *attributes
= tree_cons (get_identifier ("target"),
33734 build_tree_list (NULL_TREE
, value
),
33738 /* For testing. Insert thumb or arm modes alternatively on functions. */
33741 arm_insert_attributes (tree fndecl
, tree
* attributes
)
33745 if (! TARGET_FLIP_THUMB
)
33748 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
33749 || fndecl_built_in_p (fndecl
) || DECL_ARTIFICIAL (fndecl
))
33752 /* Nested definitions must inherit mode. */
33753 if (current_function_decl
)
33755 mode
= TARGET_THUMB
? "thumb" : "arm";
33756 add_attribute (mode
, attributes
);
33760 /* If there is already a setting don't change it. */
33761 if (lookup_attribute ("target", *attributes
) != NULL
)
33764 mode
= thumb_flipper
? "thumb" : "arm";
33765 add_attribute (mode
, attributes
);
33767 thumb_flipper
= !thumb_flipper
;
33770 /* Hook to validate attribute((target("string"))). */
33773 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
33774 tree args
, int ARG_UNUSED (flags
))
33777 struct gcc_options func_options
, func_options_set
;
33778 tree cur_tree
, new_optimize
;
33779 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
33781 /* Get the optimization options of the current function. */
33782 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
33784 /* If the function changed the optimization levels as well as setting target
33785 options, start with the optimizations specified. */
33786 if (!func_optimize
)
33787 func_optimize
= optimization_default_node
;
33789 /* Init func_options. */
33790 memset (&func_options
, 0, sizeof (func_options
));
33791 init_options_struct (&func_options
, NULL
);
33792 lang_hooks
.init_options_struct (&func_options
);
33793 memset (&func_options_set
, 0, sizeof (func_options_set
));
33795 /* Initialize func_options to the defaults. */
33796 cl_optimization_restore (&func_options
, &func_options_set
,
33797 TREE_OPTIMIZATION (func_optimize
));
33799 cl_target_option_restore (&func_options
, &func_options_set
,
33800 TREE_TARGET_OPTION (target_option_default_node
));
33802 /* Set func_options flags with new target mode. */
33803 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
33804 &func_options_set
);
33806 if (cur_tree
== NULL_TREE
)
33809 new_optimize
= build_optimization_node (&func_options
, &func_options_set
);
33811 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
33813 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
33818 /* Match an ISA feature bitmap to a named FPU. We always use the
33819 first entry that exactly matches the feature set, so that we
33820 effectively canonicalize the FPU name for the assembler. */
33822 arm_identify_fpu_from_isa (sbitmap isa
)
33824 auto_sbitmap
fpubits (isa_num_bits
);
33825 auto_sbitmap
cand_fpubits (isa_num_bits
);
33827 bitmap_and (fpubits
, isa
, isa_all_fpubits_internal
);
33829 /* If there are no ISA feature bits relating to the FPU, we must be
33830 doing soft-float. */
33831 if (bitmap_empty_p (fpubits
))
33834 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
33836 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
33837 if (bitmap_equal_p (fpubits
, cand_fpubits
))
33838 return all_fpus
[i
].name
;
33840 /* We must find an entry, or things have gone wrong. */
33841 gcc_unreachable ();
33844 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
33845 by the function fndecl. */
33847 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
33849 tree target_parts
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
33851 struct cl_target_option
*targ_options
;
33853 targ_options
= TREE_TARGET_OPTION (target_parts
);
33855 targ_options
= TREE_TARGET_OPTION (target_option_current_node
);
33856 gcc_assert (targ_options
);
33858 arm_print_asm_arch_directives (stream
, targ_options
);
33860 fprintf (stream
, "\t.syntax unified\n");
33864 if (is_called_in_ARM_mode (decl
)
33865 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
33866 && cfun
->is_thunk
))
33867 fprintf (stream
, "\t.code 32\n");
33868 else if (TARGET_THUMB1
)
33869 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
33871 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
33874 fprintf (stream
, "\t.arm\n");
33876 if (TARGET_POKE_FUNCTION_NAME
)
33877 arm_poke_function_name (stream
, (const char *) name
);
33880 /* If MEM is in the form of [base+offset], extract the two parts
33881 of address and set to BASE and OFFSET, otherwise return false
33882 after clearing BASE and OFFSET. */
33885 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
33889 gcc_assert (MEM_P (mem
));
33891 addr
= XEXP (mem
, 0);
33893 /* Strip off const from addresses like (const (addr)). */
33894 if (GET_CODE (addr
) == CONST
)
33895 addr
= XEXP (addr
, 0);
33900 *offset
= const0_rtx
;
33904 if (GET_CODE (addr
) == PLUS
33905 && GET_CODE (XEXP (addr
, 0)) == REG
33906 && CONST_INT_P (XEXP (addr
, 1)))
33908 *base
= XEXP (addr
, 0);
33909 *offset
= XEXP (addr
, 1);
33914 *offset
= NULL_RTX
;
33919 /* If INSN is a load or store of address in the form of [base+offset],
33920 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
33921 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
33922 otherwise return FALSE. */
33925 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
33929 gcc_assert (INSN_P (insn
));
33930 x
= PATTERN (insn
);
33931 if (GET_CODE (x
) != SET
)
33935 dest
= SET_DEST (x
);
33936 if (REG_P (src
) && MEM_P (dest
))
33939 extract_base_offset_in_addr (dest
, base
, offset
);
33941 else if (MEM_P (src
) && REG_P (dest
))
33944 extract_base_offset_in_addr (src
, base
, offset
);
33949 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
33952 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33954 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33955 and PRI are only calculated for these instructions. For other instruction,
33956 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
33957 instruction fusion can be supported by returning different priorities.
33959 It's important that irrelevant instructions get the largest FUSION_PRI. */
33962 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
33963 int *fusion_pri
, int *pri
)
33969 gcc_assert (INSN_P (insn
));
33972 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
33979 /* Load goes first. */
33981 *fusion_pri
= tmp
- 1;
33983 *fusion_pri
= tmp
- 2;
33987 /* INSN with smaller base register goes first. */
33988 tmp
-= ((REGNO (base
) & 0xff) << 20);
33990 /* INSN with smaller offset goes first. */
33991 off_val
= (int)(INTVAL (offset
));
33993 tmp
-= (off_val
& 0xfffff);
33995 tmp
+= ((- off_val
) & 0xfffff);
34002 /* Construct and return a PARALLEL RTX vector with elements numbering the
34003 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
34004 the vector - from the perspective of the architecture. This does not
34005 line up with GCC's perspective on lane numbers, so we end up with
34006 different masks depending on our target endian-ness. The diagram
34007 below may help. We must draw the distinction when building masks
34008 which select one half of the vector. An instruction selecting
34009 architectural low-lanes for a big-endian target, must be described using
34010 a mask selecting GCC high-lanes.
34012 Big-Endian Little-Endian
34014 GCC 0 1 2 3 3 2 1 0
34015 | x | x | x | x | | x | x | x | x |
34016 Architecture 3 2 1 0 3 2 1 0
34018 Low Mask: { 2, 3 } { 0, 1 }
34019 High Mask: { 0, 1 } { 2, 3 }
34023 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
34025 int nunits
= GET_MODE_NUNITS (mode
);
34026 rtvec v
= rtvec_alloc (nunits
/ 2);
34027 int high_base
= nunits
/ 2;
34033 if (BYTES_BIG_ENDIAN
)
34034 base
= high
? low_base
: high_base
;
34036 base
= high
? high_base
: low_base
;
34038 for (i
= 0; i
< nunits
/ 2; i
++)
34039 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
34041 t1
= gen_rtx_PARALLEL (mode
, v
);
34045 /* Check OP for validity as a PARALLEL RTX vector with elements
34046 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
34047 from the perspective of the architecture. See the diagram above
34048 arm_simd_vect_par_cnst_half_p for more details. */
34051 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
34054 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
34055 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
34056 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
34059 if (!VECTOR_MODE_P (mode
))
34062 if (count_op
!= count_ideal
)
34065 for (i
= 0; i
< count_ideal
; i
++)
34067 rtx elt_op
= XVECEXP (op
, 0, i
);
34068 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
34070 if (!CONST_INT_P (elt_op
)
34071 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
34077 /* Can output mi_thunk for all cases except for non-zero vcall_offset
34080 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
34083 /* For now, we punt and not handle this for TARGET_THUMB1. */
34084 if (vcall_offset
&& TARGET_THUMB1
)
34087 /* Otherwise ok. */
34091 /* Generate RTL for a conditional branch with rtx comparison CODE in
34092 mode CC_MODE. The destination of the unlikely conditional branch
34096 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
34100 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
34101 gen_rtx_REG (cc_mode
, CC_REGNUM
),
34104 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
34105 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
34107 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
34110 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
34112 For pure-code sections there is no letter code for this attribute, so
34113 output all the section flags numerically when this is needed. */
34116 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
34119 if (flags
& SECTION_ARM_PURECODE
)
34123 if (!(flags
& SECTION_DEBUG
))
34125 if (flags
& SECTION_EXCLUDE
)
34126 *num
|= 0x80000000;
34127 if (flags
& SECTION_WRITE
)
34129 if (flags
& SECTION_CODE
)
34131 if (flags
& SECTION_MERGE
)
34133 if (flags
& SECTION_STRINGS
)
34135 if (flags
& SECTION_TLS
)
34137 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
34146 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
34148 If pure-code is passed as an option, make sure all functions are in
34149 sections that have the SHF_ARM_PURECODE attribute. */
34152 arm_function_section (tree decl
, enum node_frequency freq
,
34153 bool startup
, bool exit
)
34155 const char * section_name
;
34158 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
34159 return default_function_section (decl
, freq
, startup
, exit
);
34161 if (!target_pure_code
)
34162 return default_function_section (decl
, freq
, startup
, exit
);
34165 section_name
= DECL_SECTION_NAME (decl
);
34167 /* If a function is not in a named section then it falls under the 'default'
34168 text section, also known as '.text'. We can preserve previous behavior as
34169 the default text section already has the SHF_ARM_PURECODE section
34173 section
*default_sec
= default_function_section (decl
, freq
, startup
,
34176 /* If default_sec is not null, then it must be a special section like for
34177 example .text.startup. We set the pure-code attribute and return the
34178 same section to preserve existing behavior. */
34180 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
34181 return default_sec
;
34184 /* Otherwise look whether a section has already been created with
34186 sec
= get_named_section (decl
, section_name
, 0);
34188 /* If that is not the case passing NULL as the section's name to
34189 'get_named_section' will create a section with the declaration's
34191 sec
= get_named_section (decl
, NULL
, 0);
34193 /* Set the SHF_ARM_PURECODE attribute. */
34194 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
34199 /* Implements the TARGET_SECTION_FLAGS hook.
34201 If DECL is a function declaration and pure-code is passed as an option
34202 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
34203 section's name and RELOC indicates whether the declarations initializer may
34204 contain runtime relocations. */
34206 static unsigned int
34207 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
34209 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
34211 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
34212 flags
|= SECTION_ARM_PURECODE
;
34217 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
34220 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
34222 rtx
*quot_p
, rtx
*rem_p
)
34224 if (mode
== SImode
)
34225 gcc_assert (!TARGET_IDIV
);
34227 scalar_int_mode libval_mode
34228 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode
));
34230 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
34231 libval_mode
, op0
, mode
, op1
, mode
);
34233 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
34234 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
34235 GET_MODE_SIZE (mode
));
34237 gcc_assert (quotient
);
34238 gcc_assert (remainder
);
34240 *quot_p
= quotient
;
34241 *rem_p
= remainder
;
34244 /* This function checks for the availability of the coprocessor builtin passed
34245 in BUILTIN for the current target. Returns true if it is available and
34246 false otherwise. If a BUILTIN is passed for which this function has not
34247 been implemented it will cause an exception. */
34250 arm_coproc_builtin_available (enum unspecv builtin
)
34252 /* None of these builtins are available in Thumb mode if the target only
34253 supports Thumb-1. */
34271 case VUNSPEC_LDC2L
:
34273 case VUNSPEC_STC2L
:
34276 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
34283 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
34285 if (arm_arch6
|| arm_arch5te
)
34288 case VUNSPEC_MCRR2
:
34289 case VUNSPEC_MRRC2
:
34294 gcc_unreachable ();
34299 /* This function returns true if OP is a valid memory operand for the ldc and
34300 stc coprocessor instructions and false otherwise. */
34303 arm_coproc_ldc_stc_legitimate_address (rtx op
)
34305 HOST_WIDE_INT range
;
34306 /* Has to be a memory operand. */
34312 /* We accept registers. */
34316 switch GET_CODE (op
)
34320 /* Or registers with an offset. */
34321 if (!REG_P (XEXP (op
, 0)))
34326 /* The offset must be an immediate though. */
34327 if (!CONST_INT_P (op
))
34330 range
= INTVAL (op
);
34332 /* Within the range of [-1020,1020]. */
34333 if (!IN_RANGE (range
, -1020, 1020))
34336 /* And a multiple of 4. */
34337 return (range
% 4) == 0;
34343 return REG_P (XEXP (op
, 0));
34345 gcc_unreachable ();
34350 /* Return the diagnostic message string if conversion from FROMTYPE to
34351 TOTYPE is not allowed, NULL otherwise. */
34353 static const char *
34354 arm_invalid_conversion (const_tree fromtype
, const_tree totype
)
34356 if (element_mode (fromtype
) != element_mode (totype
))
34358 /* Do no allow conversions to/from BFmode scalar types. */
34359 if (TYPE_MODE (fromtype
) == BFmode
)
34360 return N_("invalid conversion from type %<bfloat16_t%>");
34361 if (TYPE_MODE (totype
) == BFmode
)
34362 return N_("invalid conversion to type %<bfloat16_t%>");
34365 /* Conversion allowed. */
34369 /* Return the diagnostic message string if the unary operation OP is
34370 not permitted on TYPE, NULL otherwise. */
34372 static const char *
34373 arm_invalid_unary_op (int op
, const_tree type
)
34375 /* Reject all single-operand operations on BFmode except for &. */
34376 if (element_mode (type
) == BFmode
&& op
!= ADDR_EXPR
)
34377 return N_("operation not permitted on type %<bfloat16_t%>");
34379 /* Operation allowed. */
34383 /* Return the diagnostic message string if the binary operation OP is
34384 not permitted on TYPE1 and TYPE2, NULL otherwise. */
34386 static const char *
34387 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
,
34390 /* Reject all 2-operand operations on BFmode. */
34391 if (element_mode (type1
) == BFmode
34392 || element_mode (type2
) == BFmode
)
34393 return N_("operation not permitted on type %<bfloat16_t%>");
34395 /* Operation allowed. */
34399 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34401 In VFPv1, VFP registers could only be accessed in the mode they were
34402 set, so subregs would be invalid there. However, we don't support
34403 VFPv1 at the moment, and the restriction was lifted in VFPv2.
34405 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34406 VFP registers in little-endian order. We can't describe that accurately to
34407 GCC, so avoid taking subregs of such values.
34409 The only exception is going from a 128-bit to a 64-bit type. In that
34410 case the data layout happens to be consistent for big-endian, so we
34411 explicitly allow that case. */
34414 arm_can_change_mode_class (machine_mode from
, machine_mode to
,
34415 reg_class_t rclass
)
34418 && !(GET_MODE_SIZE (from
) == 16 && GET_MODE_SIZE (to
) == 8)
34419 && (GET_MODE_SIZE (from
) > UNITS_PER_WORD
34420 || GET_MODE_SIZE (to
) > UNITS_PER_WORD
)
34421 && reg_classes_intersect_p (VFP_REGS
, rclass
))
34426 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
34427 strcpy from constants will be faster. */
34429 static HOST_WIDE_INT
34430 arm_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
34432 unsigned int factor
= (TARGET_THUMB
|| ! arm_tune_xscale
? 1 : 2);
34433 if (TREE_CODE (exp
) == STRING_CST
&& !optimize_size
)
34434 return MAX (align
, BITS_PER_WORD
* factor
);
34438 /* Emit a speculation barrier on target architectures that do not have
34439 DSB/ISB directly. Such systems probably don't need a barrier
34440 themselves, but if the code is ever run on a later architecture, it
34441 might become a problem. */
34443 arm_emit_speculation_barrier_function ()
34445 emit_library_call (speculation_barrier_libfunc
, LCT_NORMAL
, VOIDmode
);
34448 /* Have we recorded an explicit access to the Q bit of APSR?. */
34450 arm_q_bit_access (void)
34452 if (cfun
&& cfun
->decl
)
34453 return lookup_attribute ("acle qbit",
34454 DECL_ATTRIBUTES (cfun
->decl
));
34458 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
34460 arm_ge_bits_access (void)
34462 if (cfun
&& cfun
->decl
)
34463 return lookup_attribute ("acle gebits",
34464 DECL_ATTRIBUTES (cfun
->decl
));
34468 /* NULL if insn INSN is valid within a low-overhead loop.
34469 Otherwise return why doloop cannot be applied. */
34471 static const char *
34472 arm_invalid_within_doloop (const rtx_insn
*insn
)
34474 if (!TARGET_HAVE_LOB
)
34475 return default_invalid_within_doloop (insn
);
34478 return "Function call in the loop.";
34480 if (reg_mentioned_p (gen_rtx_REG (SImode
, LR_REGNUM
), insn
))
34481 return "LR is used inside loop.";
34487 arm_target_insn_ok_for_lob (rtx insn
)
34489 basic_block bb
= BLOCK_FOR_INSN (insn
);
34490 /* Make sure the basic block of the target insn is a simple latch
34491 having as single predecessor and successor the body of the loop
34492 itself. Only simple loops with a single basic block as body are
34493 supported for 'low over head loop' making sure that LE target is
34494 above LE itself in the generated code. */
34496 return single_succ_p (bb
)
34497 && single_pred_p (bb
)
34498 && single_succ_edge (bb
)->dest
== single_pred_edge (bb
)->src
34499 && contains_no_active_insn_p (bb
);
34503 namespace selftest
{
34505 /* Scan the static data tables generated by parsecpu.awk looking for
34506 potential issues with the data. We primarily check for
34507 inconsistencies in the option extensions at present (extensions
34508 that duplicate others but aren't marked as aliases). Furthermore,
34509 for correct canonicalization later options must never be a subset
34510 of an earlier option. Any extension should also only specify other
34511 feature bits and never an architecture bit. The architecture is inferred
34512 from the declaration of the extension. */
34514 arm_test_cpu_arch_data (void)
34516 const arch_option
*arch
;
34517 const cpu_option
*cpu
;
34518 auto_sbitmap
target_isa (isa_num_bits
);
34519 auto_sbitmap
isa1 (isa_num_bits
);
34520 auto_sbitmap
isa2 (isa_num_bits
);
34522 for (arch
= all_architectures
; arch
->common
.name
!= NULL
; ++arch
)
34524 const cpu_arch_extension
*ext1
, *ext2
;
34526 if (arch
->common
.extensions
== NULL
)
34529 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
34531 for (ext1
= arch
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
34536 arm_initialize_isa (isa1
, ext1
->isa_bits
);
34537 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
34539 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
34542 arm_initialize_isa (isa2
, ext2
->isa_bits
);
34543 /* If the option is a subset of the parent option, it doesn't
34544 add anything and so isn't useful. */
34545 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
34547 /* If the extension specifies any architectural bits then
34548 disallow it. Extensions should only specify feature bits. */
34549 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
34554 for (cpu
= all_cores
; cpu
->common
.name
!= NULL
; ++cpu
)
34556 const cpu_arch_extension
*ext1
, *ext2
;
34558 if (cpu
->common
.extensions
== NULL
)
34561 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
34563 for (ext1
= cpu
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
34568 arm_initialize_isa (isa1
, ext1
->isa_bits
);
34569 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
34571 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
34574 arm_initialize_isa (isa2
, ext2
->isa_bits
);
34575 /* If the option is a subset of the parent option, it doesn't
34576 add anything and so isn't useful. */
34577 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
34579 /* If the extension specifies any architectural bits then
34580 disallow it. Extensions should only specify feature bits. */
34581 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
34587 /* Scan the static data tables generated by parsecpu.awk looking for
34588 potential issues with the data. Here we check for consistency between the
34589 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34590 a feature bit that is not defined by any FPU flag. */
34592 arm_test_fpu_data (void)
34594 auto_sbitmap
isa_all_fpubits_internal (isa_num_bits
);
34595 auto_sbitmap
fpubits (isa_num_bits
);
34596 auto_sbitmap
tmpset (isa_num_bits
);
34598 static const enum isa_feature fpu_bitlist_internal
[]
34599 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
34600 arm_initialize_isa (isa_all_fpubits_internal
, fpu_bitlist_internal
);
34602 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
34604 arm_initialize_isa (fpubits
, all_fpus
[i
].isa_bits
);
34605 bitmap_and_compl (tmpset
, isa_all_fpubits_internal
, fpubits
);
34606 bitmap_clear (isa_all_fpubits_internal
);
34607 bitmap_copy (isa_all_fpubits_internal
, tmpset
);
34610 if (!bitmap_empty_p (isa_all_fpubits_internal
))
34612 fprintf (stderr
, "Error: found feature bits in the ALL_FPU_INTERAL"
34613 " group that are not defined by any FPU.\n"
34614 " Check your arm-cpus.in.\n");
34615 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal
));
34620 arm_run_selftests (void)
34622 arm_test_cpu_arch_data ();
34623 arm_test_fpu_data ();
34625 } /* Namespace selftest. */
34627 #undef TARGET_RUN_TARGET_SELFTESTS
34628 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34629 #endif /* CHECKING_P */
34631 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34632 global variable based guard use the default else
34633 return a null tree. */
34635 arm_stack_protect_guard (void)
34637 if (arm_stack_protector_guard
== SSP_GLOBAL
)
34638 return default_stack_protect_guard ();
34643 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34644 Unlike the arm version, we do NOT implement asm flag outputs. */
34647 thumb1_md_asm_adjust (vec
<rtx
> &outputs
, vec
<rtx
> & /*inputs*/,
34648 vec
<machine_mode
> & /*input_modes*/,
34649 vec
<const char *> &constraints
, vec
<rtx
> & /*clobbers*/,
34650 HARD_REG_SET
& /*clobbered_regs*/, location_t
/*loc*/)
34652 for (unsigned i
= 0, n
= outputs
.length (); i
< n
; ++i
)
34653 if (startswith (constraints
[i
], "=@cc"))
34655 sorry ("%<asm%> flags not supported in thumb1 mode");
34661 /* Generate code to enable conditional branches in functions over 1 MiB.
34663 operands: is the operands list of the asm insn (see arm_cond_branch or
34664 arm_cond_branch_reversed).
34665 pos_label: is an index into the operands array where operands[pos_label] is
34666 the asm label of the final jump destination.
34667 dest: is a string which is used to generate the asm label of the intermediate
34669 branch_format: is a string denoting the intermediate branch format, e.g.
34670 "beq", "bne", etc. */
34673 arm_gen_far_branch (rtx
* operands
, int pos_label
, const char * dest
,
34674 const char * branch_format
)
34676 rtx_code_label
* tmp_label
= gen_label_rtx ();
34677 char label_buf
[256];
34679 ASM_GENERATE_INTERNAL_LABEL (label_buf
, dest
, \
34680 CODE_LABEL_NUMBER (tmp_label
));
34681 const char *label_ptr
= arm_strip_name_encoding (label_buf
);
34682 rtx dest_label
= operands
[pos_label
];
34683 operands
[pos_label
] = tmp_label
;
34685 snprintf (buffer
, sizeof (buffer
), "%s%s", branch_format
, label_ptr
);
34686 output_asm_insn (buffer
, operands
);
34688 snprintf (buffer
, sizeof (buffer
), "b\t%%l0%d\n%s:", pos_label
, label_ptr
);
34689 operands
[pos_label
] = dest_label
;
34690 output_asm_insn (buffer
, operands
);
34694 /* If given mode matches, load from memory to LO_REGS.
34695 (i.e [Rn], Rn <= LO_REGS). */
34697 arm_mode_base_reg_class (machine_mode mode
)
34699 if (TARGET_HAVE_MVE
34700 && (mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
))
34703 return MODE_BASE_REG_REG_CLASS (mode
);
34706 struct gcc_target targetm
= TARGET_INITIALIZER
;
34708 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
34711 arm_get_mask_mode (machine_mode mode
)
34713 if (TARGET_HAVE_MVE
)
34714 return arm_mode_to_pred_mode (mode
);
34716 return default_get_mask_mode (mode
);
34719 /* Output assembly to read the thread pointer from the appropriate TPIDR
34720 register into DEST. If PRED_P also emit the %? that can be used to
34721 output the predication code. */
34724 arm_output_load_tpidr (rtx dst
, bool pred_p
)
34727 int tpidr_coproc_num
= -1;
34728 switch (target_thread_pointer
)
34731 tpidr_coproc_num
= 2;
34734 tpidr_coproc_num
= 3;
34737 tpidr_coproc_num
= 4;
34740 gcc_unreachable ();
34742 snprintf (buf
, sizeof (buf
),
34743 "mrc%s\tp15, 0, %%0, c13, c0, %d\t@ load_tp_hard",
34744 pred_p
? "%?" : "", tpidr_coproc_num
);
34745 output_asm_insn (buf
, &dst
);
34749 #include "gt-arm.h"