1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2023 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
26 #define INCLUDE_STRING
28 #include "coretypes.h"
38 #include "stringpool.h"
45 #include "diagnostic-core.h"
47 #include "fold-const.h"
48 #include "stor-layout.h"
52 #include "insn-attr.h"
58 #include "sched-int.h"
59 #include "common/common-target.h"
60 #include "langhooks.h"
65 #include "target-globals.h"
67 #include "tm-constrs.h"
69 #include "optabs-libfuncs.h"
72 #include "gimple-iterator.h"
74 #include "tree-vectorizer.h"
76 #include "aarch-common.h"
77 #include "aarch-common-protos.h"
79 /* This file should be included last. */
80 #include "target-def.h"
82 /* Forward definitions of types. */
83 typedef struct minipool_node Mnode
;
84 typedef struct minipool_fixup Mfix
;
86 void (*arm_lang_output_object_attributes_hook
)(void);
93 /* Forward function declarations. */
94 static bool arm_const_not_ok_for_debug_p (rtx
);
95 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
96 static int arm_compute_static_chain_stack_bytes (void);
97 static arm_stack_offsets
*arm_get_frame_offsets (void);
98 static void arm_compute_frame_layout (void);
99 static void arm_add_gc_roots (void);
100 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
101 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
102 static unsigned bit_count (unsigned long);
103 static unsigned bitmap_popcount (const sbitmap
);
104 static int arm_address_register_rtx_p (rtx
, int);
105 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
106 static bool is_called_in_ARM_mode (tree
);
107 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
108 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
109 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
110 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
111 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
112 inline static int thumb1_index_register_rtx_p (rtx
, int);
113 static int thumb_far_jump_used_p (void);
114 static bool thumb_force_lr_save (void);
115 static unsigned arm_size_return_regs (void);
116 static bool arm_assemble_integer (rtx
, unsigned int, int);
117 static void arm_print_operand (FILE *, rtx
, int);
118 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
119 static bool arm_print_operand_punct_valid_p (unsigned char code
);
120 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
121 static arm_cc
get_arm_condition_code (rtx
);
122 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
123 static const char *output_multi_immediate (rtx
*, const char *, const char *,
125 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
126 static struct machine_function
*arm_init_machine_status (void);
127 static void thumb_exit (FILE *, int);
128 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
129 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
130 static Mnode
*add_minipool_forward_ref (Mfix
*);
131 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
132 static Mnode
*add_minipool_backward_ref (Mfix
*);
133 static void assign_minipool_offsets (Mfix
*);
134 static void arm_print_value (FILE *, rtx
);
135 static void dump_minipool (rtx_insn
*);
136 static int arm_barrier_cost (rtx_insn
*);
137 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
138 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
139 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
141 static void arm_reorg (void);
142 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
143 static unsigned long arm_compute_save_reg0_reg12_mask (void);
144 static unsigned long arm_compute_save_core_reg_mask (void);
145 static unsigned long arm_isr_value (tree
);
146 static unsigned long arm_compute_func_type (void);
147 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
148 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
149 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
150 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
151 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
153 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
154 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
155 static void arm_output_function_epilogue (FILE *);
156 static void arm_output_function_prologue (FILE *);
157 static int arm_comp_type_attributes (const_tree
, const_tree
);
158 static void arm_set_default_type_attributes (tree
);
159 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
160 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
161 static int optimal_immediate_sequence (enum rtx_code code
,
162 unsigned HOST_WIDE_INT val
,
163 struct four_ints
*return_sequence
);
164 static int optimal_immediate_sequence_1 (enum rtx_code code
,
165 unsigned HOST_WIDE_INT val
,
166 struct four_ints
*return_sequence
,
168 static int arm_get_strip_length (int);
169 static bool arm_function_ok_for_sibcall (tree
, tree
);
170 static machine_mode
arm_promote_function_mode (const_tree
,
173 static bool arm_return_in_memory (const_tree
, const_tree
);
174 static rtx
arm_function_value (const_tree
, const_tree
, bool);
175 static rtx
arm_libcall_value_1 (machine_mode
);
176 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
177 static bool arm_function_value_regno_p (const unsigned int);
178 static void arm_internal_label (FILE *, const char *, unsigned long);
179 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
181 static bool arm_have_conditional_execution (void);
182 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
183 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
184 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
185 static int arm_insn_cost (rtx_insn
*, bool);
186 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
187 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
188 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
189 static void emit_constant_insn (rtx cond
, rtx pattern
);
190 static rtx_insn
*emit_set_insn (rtx
, rtx
);
191 static void arm_add_cfa_adjust_cfa_note (rtx
, int, rtx
, rtx
);
192 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
193 static void arm_emit_multi_reg_pop (unsigned long);
194 static int vfp_emit_fstmd (int, int);
195 static void arm_emit_vfp_multi_reg_pop (int, int, rtx
);
196 static int arm_arg_partial_bytes (cumulative_args_t
,
197 const function_arg_info
&);
198 static rtx
arm_function_arg (cumulative_args_t
, const function_arg_info
&);
199 static void arm_function_arg_advance (cumulative_args_t
,
200 const function_arg_info
&);
201 static pad_direction
arm_function_arg_padding (machine_mode
, const_tree
);
202 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
203 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
205 static rtx
aapcs_libcall_value (machine_mode
);
206 static int aapcs_select_return_coproc (const_tree
, const_tree
);
208 #ifdef OBJECT_FORMAT_ELF
209 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
210 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
213 static void arm_encode_section_info (tree
, rtx
, int);
216 static void arm_file_end (void);
217 static void arm_file_start (void);
218 static void arm_insert_attributes (tree
, tree
*);
220 static void arm_setup_incoming_varargs (cumulative_args_t
,
221 const function_arg_info
&, int *, int);
222 static bool arm_pass_by_reference (cumulative_args_t
,
223 const function_arg_info
&);
224 static bool arm_promote_prototypes (const_tree
);
225 static bool arm_default_short_enums (void);
226 static bool arm_align_anon_bitfield (void);
227 static bool arm_return_in_msb (const_tree
);
228 static bool arm_must_pass_in_stack (const function_arg_info
&);
229 static bool arm_return_in_memory (const_tree
, const_tree
);
231 static void arm_unwind_emit (FILE *, rtx_insn
*);
232 static bool arm_output_ttype (rtx
);
233 static void arm_asm_emit_except_personality (rtx
);
235 static void arm_asm_init_sections (void);
236 static rtx
arm_dwarf_register_span (rtx
);
238 static tree
arm_cxx_guard_type (void);
239 static bool arm_cxx_guard_mask_bit (void);
240 static tree
arm_get_cookie_size (tree
);
241 static bool arm_cookie_has_size (void);
242 static bool arm_cxx_cdtor_returns_this (void);
243 static bool arm_cxx_key_method_may_be_inline (void);
244 static void arm_cxx_determine_class_data_visibility (tree
);
245 static bool arm_cxx_class_data_always_comdat (void);
246 static bool arm_cxx_use_aeabi_atexit (void);
247 static void arm_init_libfuncs (void);
248 static tree
arm_build_builtin_va_list (void);
249 static void arm_expand_builtin_va_start (tree
, rtx
);
250 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
251 static void arm_option_override (void);
252 static void arm_option_restore (struct gcc_options
*, struct gcc_options
*,
253 struct cl_target_option
*);
254 static void arm_override_options_after_change (void);
255 static void arm_option_print (FILE *, int, struct cl_target_option
*);
256 static void arm_set_current_function (tree
);
257 static bool arm_can_inline_p (tree
, tree
);
258 static void arm_relayout_function (tree
);
259 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
260 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
261 static bool arm_sched_can_speculate_insn (rtx_insn
*);
262 static bool arm_macro_fusion_p (void);
263 static bool arm_cannot_copy_insn_p (rtx_insn
*);
264 static int arm_issue_rate (void);
265 static int arm_sched_variable_issue (FILE *, int, rtx_insn
*, int);
266 static int arm_first_cycle_multipass_dfa_lookahead (void);
267 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
268 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
269 static bool arm_output_addr_const_extra (FILE *, rtx
);
270 static bool arm_allocate_stack_slots_for_args (void);
271 static bool arm_warn_func_return (tree
);
272 static tree
arm_promoted_type (const_tree t
);
273 static bool arm_scalar_mode_supported_p (scalar_mode
);
274 static bool arm_frame_pointer_required (void);
275 static bool arm_can_eliminate (const int, const int);
276 static void arm_asm_trampoline_template (FILE *);
277 static void arm_trampoline_init (rtx
, tree
, rtx
);
278 static rtx
arm_trampoline_adjust_address (rtx
);
279 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
280 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
281 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
282 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
283 static bool arm_array_mode_supported_p (machine_mode
,
284 unsigned HOST_WIDE_INT
);
285 static machine_mode
arm_preferred_simd_mode (scalar_mode
);
286 static bool arm_class_likely_spilled_p (reg_class_t
);
287 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
288 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
289 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
293 static void arm_conditional_register_usage (void);
294 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
295 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
296 static unsigned int arm_autovectorize_vector_modes (vector_modes
*, bool);
297 static int arm_default_branch_cost (bool, bool);
298 static int arm_cortex_a5_branch_cost (bool, bool);
299 static int arm_cortex_m_branch_cost (bool, bool);
300 static int arm_cortex_m7_branch_cost (bool, bool);
302 static bool arm_vectorize_vec_perm_const (machine_mode
, machine_mode
, rtx
, rtx
,
303 rtx
, const vec_perm_indices
&);
305 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
307 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
309 int misalign ATTRIBUTE_UNUSED
);
311 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
312 bool op0_preserve_value
);
313 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
315 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
318 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
320 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
322 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
323 static opt_scalar_float_mode
arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode
);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode
);
326 static bool arm_modes_tieable_p (machine_mode
, machine_mode
);
327 static HOST_WIDE_INT
arm_constant_alignment (const_tree
, HOST_WIDE_INT
);
328 static rtx_insn
*thumb1_md_asm_adjust (vec
<rtx
> &, vec
<rtx
> &,
330 vec
<const char *> &, vec
<rtx
> &,
331 vec
<rtx
> &, HARD_REG_SET
&, location_t
);
332 static const char *arm_identify_fpu_from_isa (sbitmap
);
334 /* Table of machine attributes. */
335 static const attribute_spec arm_gnu_attributes
[] =
337 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
338 affects_type_identity, handler, exclude } */
339 /* Function calls made to this symbol must be done indirectly, because
340 it may lie outside of the 26 bit addressing range of a normal function
342 { "long_call", 0, 0, false, true, true, false, NULL
, NULL
},
343 /* Whereas these functions are always known to reside within the 26 bit
345 { "short_call", 0, 0, false, true, true, false, NULL
, NULL
},
346 /* Specify the procedure call conventions for a function. */
347 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute
,
349 /* Interrupt Service Routines have special prologue and epilogue requirements. */
350 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
352 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
354 { "naked", 0, 0, true, false, false, false,
355 arm_handle_fndecl_attribute
, NULL
},
357 /* ARM/PE has three new attributes:
359 dllexport - for exporting a function/variable that will live in a dll
360 dllimport - for importing a function/variable from a dll
362 Microsoft allows multiple declspecs in one __declspec, separating
363 them with spaces. We do NOT support this. Instead, use __declspec
366 { "dllimport", 0, 0, true, false, false, false, NULL
, NULL
},
367 { "dllexport", 0, 0, true, false, false, false, NULL
, NULL
},
368 { "interfacearm", 0, 0, true, false, false, false,
369 arm_handle_fndecl_attribute
, NULL
},
370 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
371 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute
,
373 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute
,
375 { "notshared", 0, 0, false, true, false, false,
376 arm_handle_notshared_attribute
, NULL
},
378 /* ARMv8-M Security Extensions support. */
379 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
380 arm_handle_cmse_nonsecure_entry
, NULL
},
381 { "cmse_nonsecure_call", 0, 0, false, false, false, true,
382 arm_handle_cmse_nonsecure_call
, NULL
},
383 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL
, NULL
}
386 static const scoped_attribute_specs arm_gnu_attribute_table
=
388 "gnu", { arm_gnu_attributes
}
391 static const scoped_attribute_specs
*const arm_attribute_table
[] =
393 &arm_gnu_attribute_table
396 /* Initialize the GCC target structure. */
397 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
398 #undef TARGET_MERGE_DECL_ATTRIBUTES
399 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
402 #undef TARGET_CHECK_BUILTIN_CALL
403 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
405 #undef TARGET_LEGITIMIZE_ADDRESS
406 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
408 #undef TARGET_ATTRIBUTE_TABLE
409 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
411 #undef TARGET_INSERT_ATTRIBUTES
412 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
414 #undef TARGET_ASM_FILE_START
415 #define TARGET_ASM_FILE_START arm_file_start
416 #undef TARGET_ASM_FILE_END
417 #define TARGET_ASM_FILE_END arm_file_end
419 #undef TARGET_ASM_ALIGNED_SI_OP
420 #define TARGET_ASM_ALIGNED_SI_OP NULL
421 #undef TARGET_ASM_INTEGER
422 #define TARGET_ASM_INTEGER arm_assemble_integer
424 #undef TARGET_PRINT_OPERAND
425 #define TARGET_PRINT_OPERAND arm_print_operand
426 #undef TARGET_PRINT_OPERAND_ADDRESS
427 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
428 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
429 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
431 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
432 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
434 #undef TARGET_ASM_FUNCTION_PROLOGUE
435 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
437 #undef TARGET_ASM_FUNCTION_EPILOGUE
438 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
440 #undef TARGET_CAN_INLINE_P
441 #define TARGET_CAN_INLINE_P arm_can_inline_p
443 #undef TARGET_RELAYOUT_FUNCTION
444 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
446 #undef TARGET_OPTION_OVERRIDE
447 #define TARGET_OPTION_OVERRIDE arm_option_override
449 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
450 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
452 #undef TARGET_OPTION_RESTORE
453 #define TARGET_OPTION_RESTORE arm_option_restore
455 #undef TARGET_OPTION_PRINT
456 #define TARGET_OPTION_PRINT arm_option_print
458 #undef TARGET_COMP_TYPE_ATTRIBUTES
459 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
461 #undef TARGET_SCHED_CAN_SPECULATE_INSN
462 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
464 #undef TARGET_SCHED_MACRO_FUSION_P
465 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
467 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
468 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
470 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
471 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
473 #undef TARGET_SCHED_ADJUST_COST
474 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
476 #undef TARGET_SET_CURRENT_FUNCTION
477 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
479 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
480 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
482 #undef TARGET_SCHED_REORDER
483 #define TARGET_SCHED_REORDER arm_sched_reorder
485 #undef TARGET_REGISTER_MOVE_COST
486 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
488 #undef TARGET_MEMORY_MOVE_COST
489 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
491 #undef TARGET_ENCODE_SECTION_INFO
493 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
495 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
498 #undef TARGET_STRIP_NAME_ENCODING
499 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
501 #undef TARGET_ASM_INTERNAL_LABEL
502 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
504 #undef TARGET_FLOATN_MODE
505 #define TARGET_FLOATN_MODE arm_floatn_mode
507 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
508 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
510 #undef TARGET_FUNCTION_VALUE
511 #define TARGET_FUNCTION_VALUE arm_function_value
513 #undef TARGET_LIBCALL_VALUE
514 #define TARGET_LIBCALL_VALUE arm_libcall_value
516 #undef TARGET_FUNCTION_VALUE_REGNO_P
517 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
519 #undef TARGET_GIMPLE_FOLD_BUILTIN
520 #define TARGET_GIMPLE_FOLD_BUILTIN arm_gimple_fold_builtin
522 #undef TARGET_ASM_OUTPUT_MI_THUNK
523 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
524 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
525 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
527 #undef TARGET_RTX_COSTS
528 #define TARGET_RTX_COSTS arm_rtx_costs
529 #undef TARGET_ADDRESS_COST
530 #define TARGET_ADDRESS_COST arm_address_cost
531 #undef TARGET_INSN_COST
532 #define TARGET_INSN_COST arm_insn_cost
534 #undef TARGET_SHIFT_TRUNCATION_MASK
535 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
536 #undef TARGET_VECTOR_MODE_SUPPORTED_P
537 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
538 #undef TARGET_ARRAY_MODE_SUPPORTED_P
539 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
540 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
541 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
542 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
543 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
544 arm_autovectorize_vector_modes
546 #undef TARGET_MACHINE_DEPENDENT_REORG
547 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
549 #undef TARGET_INIT_BUILTINS
550 #define TARGET_INIT_BUILTINS arm_init_builtins
551 #undef TARGET_EXPAND_BUILTIN
552 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
553 #undef TARGET_BUILTIN_DECL
554 #define TARGET_BUILTIN_DECL arm_builtin_decl
556 #undef TARGET_INIT_LIBFUNCS
557 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
559 #undef TARGET_PROMOTE_FUNCTION_MODE
560 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
561 #undef TARGET_PROMOTE_PROTOTYPES
562 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
563 #undef TARGET_PASS_BY_REFERENCE
564 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
565 #undef TARGET_ARG_PARTIAL_BYTES
566 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
567 #undef TARGET_FUNCTION_ARG
568 #define TARGET_FUNCTION_ARG arm_function_arg
569 #undef TARGET_FUNCTION_ARG_ADVANCE
570 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
571 #undef TARGET_FUNCTION_ARG_PADDING
572 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
573 #undef TARGET_FUNCTION_ARG_BOUNDARY
574 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
576 #undef TARGET_SETUP_INCOMING_VARARGS
577 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
579 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
580 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
582 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
583 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
584 #undef TARGET_TRAMPOLINE_INIT
585 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
586 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
587 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
589 #undef TARGET_WARN_FUNC_RETURN
590 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
592 #undef TARGET_DEFAULT_SHORT_ENUMS
593 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
595 #undef TARGET_ALIGN_ANON_BITFIELD
596 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
598 #undef TARGET_NARROW_VOLATILE_BITFIELD
599 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
601 #undef TARGET_CXX_GUARD_TYPE
602 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
604 #undef TARGET_CXX_GUARD_MASK_BIT
605 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
607 #undef TARGET_CXX_GET_COOKIE_SIZE
608 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
610 #undef TARGET_CXX_COOKIE_HAS_SIZE
611 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
613 #undef TARGET_CXX_CDTOR_RETURNS_THIS
614 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
616 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
617 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
619 #undef TARGET_CXX_USE_AEABI_ATEXIT
620 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
622 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
623 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
624 arm_cxx_determine_class_data_visibility
626 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
627 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
629 #undef TARGET_RETURN_IN_MSB
630 #define TARGET_RETURN_IN_MSB arm_return_in_msb
632 #undef TARGET_RETURN_IN_MEMORY
633 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
635 #undef TARGET_MUST_PASS_IN_STACK
636 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
639 #undef TARGET_ASM_UNWIND_EMIT
640 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
642 /* EABI unwinding tables use a different format for the typeinfo tables. */
643 #undef TARGET_ASM_TTYPE
644 #define TARGET_ASM_TTYPE arm_output_ttype
646 #undef TARGET_ARM_EABI_UNWINDER
647 #define TARGET_ARM_EABI_UNWINDER true
649 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
650 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
652 #endif /* ARM_UNWIND_INFO */
654 #undef TARGET_ASM_INIT_SECTIONS
655 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
657 #undef TARGET_DWARF_REGISTER_SPAN
658 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
660 #undef TARGET_CANNOT_COPY_INSN_P
661 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
664 #undef TARGET_HAVE_TLS
665 #define TARGET_HAVE_TLS true
668 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
669 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
671 #undef TARGET_LEGITIMATE_CONSTANT_P
672 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
674 #undef TARGET_CANNOT_FORCE_CONST_MEM
675 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
677 #undef TARGET_MAX_ANCHOR_OFFSET
678 #define TARGET_MAX_ANCHOR_OFFSET 4095
680 /* The minimum is set such that the total size of the block
681 for a particular anchor is -4088 + 1 + 4095 bytes, which is
682 divisible by eight, ensuring natural spacing of anchors. */
683 #undef TARGET_MIN_ANCHOR_OFFSET
684 #define TARGET_MIN_ANCHOR_OFFSET -4088
686 #undef TARGET_SCHED_ISSUE_RATE
687 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
689 #undef TARGET_SCHED_VARIABLE_ISSUE
690 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
692 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
693 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
694 arm_first_cycle_multipass_dfa_lookahead
696 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
697 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
698 arm_first_cycle_multipass_dfa_lookahead_guard
700 #undef TARGET_MANGLE_TYPE
701 #define TARGET_MANGLE_TYPE arm_mangle_type
703 #undef TARGET_INVALID_CONVERSION
704 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
706 #undef TARGET_INVALID_UNARY_OP
707 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
709 #undef TARGET_INVALID_BINARY_OP
710 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
712 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
713 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
715 #undef TARGET_BUILD_BUILTIN_VA_LIST
716 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
717 #undef TARGET_EXPAND_BUILTIN_VA_START
718 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
719 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
720 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
723 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
724 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
727 #undef TARGET_LEGITIMATE_ADDRESS_P
728 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
730 #undef TARGET_PREFERRED_RELOAD_CLASS
731 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
733 #undef TARGET_PROMOTED_TYPE
734 #define TARGET_PROMOTED_TYPE arm_promoted_type
736 #undef TARGET_SCALAR_MODE_SUPPORTED_P
737 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
739 #undef TARGET_COMPUTE_FRAME_LAYOUT
740 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
742 #undef TARGET_FRAME_POINTER_REQUIRED
743 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
745 #undef TARGET_CAN_ELIMINATE
746 #define TARGET_CAN_ELIMINATE arm_can_eliminate
748 #undef TARGET_CONDITIONAL_REGISTER_USAGE
749 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
751 #undef TARGET_CLASS_LIKELY_SPILLED_P
752 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
754 #undef TARGET_VECTORIZE_BUILTINS
755 #define TARGET_VECTORIZE_BUILTINS
757 #undef TARGET_VECTOR_ALIGNMENT
758 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
760 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
761 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
762 arm_vector_alignment_reachable
764 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
765 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
766 arm_builtin_support_vector_misalignment
768 #undef TARGET_PREFERRED_RENAME_CLASS
769 #define TARGET_PREFERRED_RENAME_CLASS \
770 arm_preferred_rename_class
772 #undef TARGET_VECTORIZE_VEC_PERM_CONST
773 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
775 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
776 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
777 arm_builtin_vectorization_cost
779 #undef TARGET_CANONICALIZE_COMPARISON
780 #define TARGET_CANONICALIZE_COMPARISON \
781 arm_canonicalize_comparison
783 #undef TARGET_ASAN_SHADOW_OFFSET
784 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
786 #undef MAX_INSN_PER_IT_BLOCK
787 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
789 #undef TARGET_CAN_USE_DOLOOP_P
790 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
792 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
793 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
795 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
796 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
798 #undef TARGET_SCHED_FUSION_PRIORITY
799 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
801 #undef TARGET_ASM_FUNCTION_SECTION
802 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
804 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
805 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
807 #undef TARGET_SECTION_TYPE_FLAGS
808 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
810 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
811 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
813 #undef TARGET_C_EXCESS_PRECISION
814 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
816 /* Although the architecture reserves bits 0 and 1, only the former is
817 used for ARM/Thumb ISA selection in v7 and earlier versions. */
818 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
819 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
821 #undef TARGET_FIXED_CONDITION_CODE_REGS
822 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
824 #undef TARGET_HARD_REGNO_NREGS
825 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
826 #undef TARGET_HARD_REGNO_MODE_OK
827 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
829 #undef TARGET_MODES_TIEABLE_P
830 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
832 #undef TARGET_CAN_CHANGE_MODE_CLASS
833 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
835 #undef TARGET_CONSTANT_ALIGNMENT
836 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
838 #undef TARGET_INVALID_WITHIN_DOLOOP
839 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
841 #undef TARGET_MD_ASM_ADJUST
842 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
844 #undef TARGET_STACK_PROTECT_GUARD
845 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
847 #undef TARGET_VECTORIZE_GET_MASK_MODE
848 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
850 /* Obstack for minipool constant handling. */
851 static struct obstack minipool_obstack
;
852 static char * minipool_startobj
;
854 /* The maximum number of insns skipped which
855 will be conditionalised if possible. */
856 static int max_insns_skipped
= 5;
858 /* True if we are currently building a constant table. */
859 int making_const_table
;
861 /* The processor for which instructions should be scheduled. */
862 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
864 /* The current tuning set. */
865 const struct tune_params
*current_tune
;
867 /* Which floating point hardware to schedule for. */
870 /* Used for Thumb call_via trampolines. */
871 rtx thumb_call_via_label
[14];
872 static int thumb_call_reg_needed
;
874 /* The bits in this mask specify which instruction scheduling options should
876 unsigned int tune_flags
= 0;
878 /* The highest ARM architecture version supported by the
880 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
882 /* Active target architecture and tuning. */
884 struct arm_build_target arm_active_target
;
886 /* The following are used in the arm.md file as equivalents to bits
887 in the above two flag variables. */
889 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
892 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
895 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
898 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
901 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
904 /* Nonzero if this chip supports the ARM 6K extensions. */
907 /* Nonzero if this chip supports the ARM 6KZ extensions. */
910 /* Nonzero if instructions present in ARMv6-M can be used. */
913 /* Nonzero if this chip supports the ARM 7 extensions. */
916 /* Nonzero if this chip supports the Large Physical Address Extension. */
917 int arm_arch_lpae
= 0;
919 /* Nonzero if instructions not present in the 'M' profile can be used. */
920 int arm_arch_notm
= 0;
922 /* Nonzero if instructions present in ARMv7E-M can be used. */
925 /* Nonzero if instructions present in ARMv8 can be used. */
928 /* Nonzero if this chip supports the ARMv8.1 extensions. */
931 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
934 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
937 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
940 /* Nonzero if this chip supports the ARM Architecture 8-M Mainline
942 int arm_arch8m_main
= 0;
944 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
946 int arm_arch8_1m_main
= 0;
948 /* Nonzero if this chip supports the FP16 instructions extension of ARM
950 int arm_fp16_inst
= 0;
952 /* Nonzero if this chip can benefit from load scheduling. */
953 int arm_ld_sched
= 0;
955 /* Nonzero if this chip is a StrongARM. */
956 int arm_tune_strongarm
= 0;
958 /* Nonzero if this chip supports Intel Wireless MMX technology. */
959 int arm_arch_iwmmxt
= 0;
961 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
962 int arm_arch_iwmmxt2
= 0;
964 /* Nonzero if this chip is an XScale. */
965 int arm_arch_xscale
= 0;
967 /* Nonzero if tuning for XScale */
968 int arm_tune_xscale
= 0;
970 /* Nonzero if we want to tune for stores that access the write-buffer.
971 This typically means an ARM6 or ARM7 with MMU or MPU. */
972 int arm_tune_wbuf
= 0;
974 /* Nonzero if tuning for Cortex-A9. */
975 int arm_tune_cortex_a9
= 0;
977 /* Nonzero if we should define __THUMB_INTERWORK__ in the
979 XXX This is a bit of a hack, it's intended to help work around
980 problems in GLD which doesn't understand that armv5t code is
981 interworking clean. */
982 int arm_cpp_interwork
= 0;
984 /* Nonzero if chip supports Thumb 1. */
987 /* Nonzero if chip supports Thumb 2. */
990 /* Nonzero if chip supports integer division instruction. */
991 int arm_arch_arm_hwdiv
;
992 int arm_arch_thumb_hwdiv
;
994 /* Nonzero if chip disallows volatile memory access in IT block. */
995 int arm_arch_no_volatile_ce
;
997 /* Nonzero if we shouldn't use literal pools. */
998 bool arm_disable_literal_pool
= false;
1000 /* The register number to be used for the PIC offset register. */
1001 unsigned arm_pic_register
= INVALID_REGNUM
;
1003 enum arm_pcs arm_pcs_default
;
1005 /* For an explanation of these variables, see final_prescan_insn below. */
1006 int arm_ccfsm_state
;
1007 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
1008 enum arm_cond_code arm_current_cc
;
1010 rtx arm_target_insn
;
1011 int arm_target_label
;
1012 /* The number of conditionally executed insns, including the current insn. */
1013 int arm_condexec_count
= 0;
1014 /* A bitmask specifying the patterns for the IT block.
1015 Zero means do not output an IT block before this insn. */
1016 int arm_condexec_mask
= 0;
1017 /* The number of bits used in arm_condexec_mask. */
1018 int arm_condexec_masklen
= 0;
1020 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1021 int arm_arch_crc
= 0;
1023 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1024 int arm_arch_dotprod
= 0;
1026 /* Nonzero if chip supports the ARMv8-M security extensions. */
1027 int arm_arch_cmse
= 0;
1029 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1030 int arm_m_profile_small_mul
= 0;
1032 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1033 int arm_arch_i8mm
= 0;
1035 /* Nonzero if chip supports the BFloat16 instructions. */
1036 int arm_arch_bf16
= 0;
1038 /* Nonzero if chip supports the Custom Datapath Extension. */
1039 int arm_arch_cde
= 0;
1040 int arm_arch_cde_coproc
= 0;
1041 const int arm_arch_cde_coproc_bits
[] = {
1042 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1045 /* The condition codes of the ARM, and the inverse function. */
1046 static const char * const arm_condition_codes
[] =
1048 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1049 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1052 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1053 int arm_regs_in_sequence
[] =
1055 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1058 #define DEF_FP_SYSREG(reg) #reg,
1059 const char *fp_sysreg_names
[NB_FP_SYSREGS
] = {
1062 #undef DEF_FP_SYSREG
1064 #define ARM_LSL_NAME "lsl"
1065 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1067 #define THUMB2_WORK_REGS \
1068 (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1069 | (1 << SP_REGNUM) \
1070 | (1 << PC_REGNUM) \
1071 | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM \
1072 ? (1 << PIC_OFFSET_TABLE_REGNUM) \
1075 /* Initialization code. */
1079 enum processor_type scheduler
;
1080 unsigned int tune_flags
;
1081 const struct tune_params
*tune
;
1084 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1085 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1092 /* arm generic vectorizer costs. */
1094 struct cpu_vec_costs arm_default_vec_cost
= {
1095 1, /* scalar_stmt_cost. */
1096 1, /* scalar load_cost. */
1097 1, /* scalar_store_cost. */
1098 1, /* vec_stmt_cost. */
1099 1, /* vec_to_scalar_cost. */
1100 1, /* scalar_to_vec_cost. */
1101 1, /* vec_align_load_cost. */
1102 1, /* vec_unalign_load_cost. */
1103 1, /* vec_unalign_store_cost. */
1104 1, /* vec_store_cost. */
1105 3, /* cond_taken_branch_cost. */
1106 1, /* cond_not_taken_branch_cost. */
1109 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1110 #include "aarch-cost-tables.h"
1114 const struct cpu_cost_table cortexa9_extra_costs
=
1121 COSTS_N_INSNS (1), /* shift_reg. */
1122 COSTS_N_INSNS (1), /* arith_shift. */
1123 COSTS_N_INSNS (2), /* arith_shift_reg. */
1125 COSTS_N_INSNS (1), /* log_shift_reg. */
1126 COSTS_N_INSNS (1), /* extend. */
1127 COSTS_N_INSNS (2), /* extend_arith. */
1128 COSTS_N_INSNS (1), /* bfi. */
1129 COSTS_N_INSNS (1), /* bfx. */
1133 true /* non_exec_costs_exec. */
1138 COSTS_N_INSNS (3), /* simple. */
1139 COSTS_N_INSNS (3), /* flag_setting. */
1140 COSTS_N_INSNS (2), /* extend. */
1141 COSTS_N_INSNS (3), /* add. */
1142 COSTS_N_INSNS (2), /* extend_add. */
1143 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1147 0, /* simple (N/A). */
1148 0, /* flag_setting (N/A). */
1149 COSTS_N_INSNS (4), /* extend. */
1151 COSTS_N_INSNS (4), /* extend_add. */
1157 COSTS_N_INSNS (2), /* load. */
1158 COSTS_N_INSNS (2), /* load_sign_extend. */
1159 COSTS_N_INSNS (2), /* ldrd. */
1160 COSTS_N_INSNS (2), /* ldm_1st. */
1161 1, /* ldm_regs_per_insn_1st. */
1162 2, /* ldm_regs_per_insn_subsequent. */
1163 COSTS_N_INSNS (5), /* loadf. */
1164 COSTS_N_INSNS (5), /* loadd. */
1165 COSTS_N_INSNS (1), /* load_unaligned. */
1166 COSTS_N_INSNS (2), /* store. */
1167 COSTS_N_INSNS (2), /* strd. */
1168 COSTS_N_INSNS (2), /* stm_1st. */
1169 1, /* stm_regs_per_insn_1st. */
1170 2, /* stm_regs_per_insn_subsequent. */
1171 COSTS_N_INSNS (1), /* storef. */
1172 COSTS_N_INSNS (1), /* stored. */
1173 COSTS_N_INSNS (1), /* store_unaligned. */
1174 COSTS_N_INSNS (1), /* loadv. */
1175 COSTS_N_INSNS (1) /* storev. */
1180 COSTS_N_INSNS (14), /* div. */
1181 COSTS_N_INSNS (4), /* mult. */
1182 COSTS_N_INSNS (7), /* mult_addsub. */
1183 COSTS_N_INSNS (30), /* fma. */
1184 COSTS_N_INSNS (3), /* addsub. */
1185 COSTS_N_INSNS (1), /* fpconst. */
1186 COSTS_N_INSNS (1), /* neg. */
1187 COSTS_N_INSNS (3), /* compare. */
1188 COSTS_N_INSNS (3), /* widen. */
1189 COSTS_N_INSNS (3), /* narrow. */
1190 COSTS_N_INSNS (3), /* toint. */
1191 COSTS_N_INSNS (3), /* fromint. */
1192 COSTS_N_INSNS (3) /* roundint. */
1196 COSTS_N_INSNS (24), /* div. */
1197 COSTS_N_INSNS (5), /* mult. */
1198 COSTS_N_INSNS (8), /* mult_addsub. */
1199 COSTS_N_INSNS (30), /* fma. */
1200 COSTS_N_INSNS (3), /* addsub. */
1201 COSTS_N_INSNS (1), /* fpconst. */
1202 COSTS_N_INSNS (1), /* neg. */
1203 COSTS_N_INSNS (3), /* compare. */
1204 COSTS_N_INSNS (3), /* widen. */
1205 COSTS_N_INSNS (3), /* narrow. */
1206 COSTS_N_INSNS (3), /* toint. */
1207 COSTS_N_INSNS (3), /* fromint. */
1208 COSTS_N_INSNS (3) /* roundint. */
1213 COSTS_N_INSNS (1), /* alu. */
1214 COSTS_N_INSNS (4), /* mult. */
1215 COSTS_N_INSNS (1), /* movi. */
1216 COSTS_N_INSNS (2), /* dup. */
1217 COSTS_N_INSNS (2) /* extract. */
1221 const struct cpu_cost_table cortexa8_extra_costs
=
1227 COSTS_N_INSNS (1), /* shift. */
1229 COSTS_N_INSNS (1), /* arith_shift. */
1230 0, /* arith_shift_reg. */
1231 COSTS_N_INSNS (1), /* log_shift. */
1232 0, /* log_shift_reg. */
1234 0, /* extend_arith. */
1240 true /* non_exec_costs_exec. */
1245 COSTS_N_INSNS (1), /* simple. */
1246 COSTS_N_INSNS (1), /* flag_setting. */
1247 COSTS_N_INSNS (1), /* extend. */
1248 COSTS_N_INSNS (1), /* add. */
1249 COSTS_N_INSNS (1), /* extend_add. */
1250 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1254 0, /* simple (N/A). */
1255 0, /* flag_setting (N/A). */
1256 COSTS_N_INSNS (2), /* extend. */
1258 COSTS_N_INSNS (2), /* extend_add. */
1264 COSTS_N_INSNS (1), /* load. */
1265 COSTS_N_INSNS (1), /* load_sign_extend. */
1266 COSTS_N_INSNS (1), /* ldrd. */
1267 COSTS_N_INSNS (1), /* ldm_1st. */
1268 1, /* ldm_regs_per_insn_1st. */
1269 2, /* ldm_regs_per_insn_subsequent. */
1270 COSTS_N_INSNS (1), /* loadf. */
1271 COSTS_N_INSNS (1), /* loadd. */
1272 COSTS_N_INSNS (1), /* load_unaligned. */
1273 COSTS_N_INSNS (1), /* store. */
1274 COSTS_N_INSNS (1), /* strd. */
1275 COSTS_N_INSNS (1), /* stm_1st. */
1276 1, /* stm_regs_per_insn_1st. */
1277 2, /* stm_regs_per_insn_subsequent. */
1278 COSTS_N_INSNS (1), /* storef. */
1279 COSTS_N_INSNS (1), /* stored. */
1280 COSTS_N_INSNS (1), /* store_unaligned. */
1281 COSTS_N_INSNS (1), /* loadv. */
1282 COSTS_N_INSNS (1) /* storev. */
1287 COSTS_N_INSNS (36), /* div. */
1288 COSTS_N_INSNS (11), /* mult. */
1289 COSTS_N_INSNS (20), /* mult_addsub. */
1290 COSTS_N_INSNS (30), /* fma. */
1291 COSTS_N_INSNS (9), /* addsub. */
1292 COSTS_N_INSNS (3), /* fpconst. */
1293 COSTS_N_INSNS (3), /* neg. */
1294 COSTS_N_INSNS (6), /* compare. */
1295 COSTS_N_INSNS (4), /* widen. */
1296 COSTS_N_INSNS (4), /* narrow. */
1297 COSTS_N_INSNS (8), /* toint. */
1298 COSTS_N_INSNS (8), /* fromint. */
1299 COSTS_N_INSNS (8) /* roundint. */
1303 COSTS_N_INSNS (64), /* div. */
1304 COSTS_N_INSNS (16), /* mult. */
1305 COSTS_N_INSNS (25), /* mult_addsub. */
1306 COSTS_N_INSNS (30), /* fma. */
1307 COSTS_N_INSNS (9), /* addsub. */
1308 COSTS_N_INSNS (3), /* fpconst. */
1309 COSTS_N_INSNS (3), /* neg. */
1310 COSTS_N_INSNS (6), /* compare. */
1311 COSTS_N_INSNS (6), /* widen. */
1312 COSTS_N_INSNS (6), /* narrow. */
1313 COSTS_N_INSNS (8), /* toint. */
1314 COSTS_N_INSNS (8), /* fromint. */
1315 COSTS_N_INSNS (8) /* roundint. */
1320 COSTS_N_INSNS (1), /* alu. */
1321 COSTS_N_INSNS (4), /* mult. */
1322 COSTS_N_INSNS (1), /* movi. */
1323 COSTS_N_INSNS (2), /* dup. */
1324 COSTS_N_INSNS (2) /* extract. */
1328 const struct cpu_cost_table cortexa5_extra_costs
=
1334 COSTS_N_INSNS (1), /* shift. */
1335 COSTS_N_INSNS (1), /* shift_reg. */
1336 COSTS_N_INSNS (1), /* arith_shift. */
1337 COSTS_N_INSNS (1), /* arith_shift_reg. */
1338 COSTS_N_INSNS (1), /* log_shift. */
1339 COSTS_N_INSNS (1), /* log_shift_reg. */
1340 COSTS_N_INSNS (1), /* extend. */
1341 COSTS_N_INSNS (1), /* extend_arith. */
1342 COSTS_N_INSNS (1), /* bfi. */
1343 COSTS_N_INSNS (1), /* bfx. */
1344 COSTS_N_INSNS (1), /* clz. */
1345 COSTS_N_INSNS (1), /* rev. */
1347 true /* non_exec_costs_exec. */
1354 COSTS_N_INSNS (1), /* flag_setting. */
1355 COSTS_N_INSNS (1), /* extend. */
1356 COSTS_N_INSNS (1), /* add. */
1357 COSTS_N_INSNS (1), /* extend_add. */
1358 COSTS_N_INSNS (7) /* idiv. */
1362 0, /* simple (N/A). */
1363 0, /* flag_setting (N/A). */
1364 COSTS_N_INSNS (1), /* extend. */
1366 COSTS_N_INSNS (2), /* extend_add. */
1372 COSTS_N_INSNS (1), /* load. */
1373 COSTS_N_INSNS (1), /* load_sign_extend. */
1374 COSTS_N_INSNS (6), /* ldrd. */
1375 COSTS_N_INSNS (1), /* ldm_1st. */
1376 1, /* ldm_regs_per_insn_1st. */
1377 2, /* ldm_regs_per_insn_subsequent. */
1378 COSTS_N_INSNS (2), /* loadf. */
1379 COSTS_N_INSNS (4), /* loadd. */
1380 COSTS_N_INSNS (1), /* load_unaligned. */
1381 COSTS_N_INSNS (1), /* store. */
1382 COSTS_N_INSNS (3), /* strd. */
1383 COSTS_N_INSNS (1), /* stm_1st. */
1384 1, /* stm_regs_per_insn_1st. */
1385 2, /* stm_regs_per_insn_subsequent. */
1386 COSTS_N_INSNS (2), /* storef. */
1387 COSTS_N_INSNS (2), /* stored. */
1388 COSTS_N_INSNS (1), /* store_unaligned. */
1389 COSTS_N_INSNS (1), /* loadv. */
1390 COSTS_N_INSNS (1) /* storev. */
1395 COSTS_N_INSNS (15), /* div. */
1396 COSTS_N_INSNS (3), /* mult. */
1397 COSTS_N_INSNS (7), /* mult_addsub. */
1398 COSTS_N_INSNS (7), /* fma. */
1399 COSTS_N_INSNS (3), /* addsub. */
1400 COSTS_N_INSNS (3), /* fpconst. */
1401 COSTS_N_INSNS (3), /* neg. */
1402 COSTS_N_INSNS (3), /* compare. */
1403 COSTS_N_INSNS (3), /* widen. */
1404 COSTS_N_INSNS (3), /* narrow. */
1405 COSTS_N_INSNS (3), /* toint. */
1406 COSTS_N_INSNS (3), /* fromint. */
1407 COSTS_N_INSNS (3) /* roundint. */
1411 COSTS_N_INSNS (30), /* div. */
1412 COSTS_N_INSNS (6), /* mult. */
1413 COSTS_N_INSNS (10), /* mult_addsub. */
1414 COSTS_N_INSNS (7), /* fma. */
1415 COSTS_N_INSNS (3), /* addsub. */
1416 COSTS_N_INSNS (3), /* fpconst. */
1417 COSTS_N_INSNS (3), /* neg. */
1418 COSTS_N_INSNS (3), /* compare. */
1419 COSTS_N_INSNS (3), /* widen. */
1420 COSTS_N_INSNS (3), /* narrow. */
1421 COSTS_N_INSNS (3), /* toint. */
1422 COSTS_N_INSNS (3), /* fromint. */
1423 COSTS_N_INSNS (3) /* roundint. */
1428 COSTS_N_INSNS (1), /* alu. */
1429 COSTS_N_INSNS (4), /* mult. */
1430 COSTS_N_INSNS (1), /* movi. */
1431 COSTS_N_INSNS (2), /* dup. */
1432 COSTS_N_INSNS (2) /* extract. */
1437 const struct cpu_cost_table cortexa7_extra_costs
=
1443 COSTS_N_INSNS (1), /* shift. */
1444 COSTS_N_INSNS (1), /* shift_reg. */
1445 COSTS_N_INSNS (1), /* arith_shift. */
1446 COSTS_N_INSNS (1), /* arith_shift_reg. */
1447 COSTS_N_INSNS (1), /* log_shift. */
1448 COSTS_N_INSNS (1), /* log_shift_reg. */
1449 COSTS_N_INSNS (1), /* extend. */
1450 COSTS_N_INSNS (1), /* extend_arith. */
1451 COSTS_N_INSNS (1), /* bfi. */
1452 COSTS_N_INSNS (1), /* bfx. */
1453 COSTS_N_INSNS (1), /* clz. */
1454 COSTS_N_INSNS (1), /* rev. */
1456 true /* non_exec_costs_exec. */
1463 COSTS_N_INSNS (1), /* flag_setting. */
1464 COSTS_N_INSNS (1), /* extend. */
1465 COSTS_N_INSNS (1), /* add. */
1466 COSTS_N_INSNS (1), /* extend_add. */
1467 COSTS_N_INSNS (7) /* idiv. */
1471 0, /* simple (N/A). */
1472 0, /* flag_setting (N/A). */
1473 COSTS_N_INSNS (1), /* extend. */
1475 COSTS_N_INSNS (2), /* extend_add. */
1481 COSTS_N_INSNS (1), /* load. */
1482 COSTS_N_INSNS (1), /* load_sign_extend. */
1483 COSTS_N_INSNS (3), /* ldrd. */
1484 COSTS_N_INSNS (1), /* ldm_1st. */
1485 1, /* ldm_regs_per_insn_1st. */
1486 2, /* ldm_regs_per_insn_subsequent. */
1487 COSTS_N_INSNS (2), /* loadf. */
1488 COSTS_N_INSNS (2), /* loadd. */
1489 COSTS_N_INSNS (1), /* load_unaligned. */
1490 COSTS_N_INSNS (1), /* store. */
1491 COSTS_N_INSNS (3), /* strd. */
1492 COSTS_N_INSNS (1), /* stm_1st. */
1493 1, /* stm_regs_per_insn_1st. */
1494 2, /* stm_regs_per_insn_subsequent. */
1495 COSTS_N_INSNS (2), /* storef. */
1496 COSTS_N_INSNS (2), /* stored. */
1497 COSTS_N_INSNS (1), /* store_unaligned. */
1498 COSTS_N_INSNS (1), /* loadv. */
1499 COSTS_N_INSNS (1) /* storev. */
1504 COSTS_N_INSNS (15), /* div. */
1505 COSTS_N_INSNS (3), /* mult. */
1506 COSTS_N_INSNS (7), /* mult_addsub. */
1507 COSTS_N_INSNS (7), /* fma. */
1508 COSTS_N_INSNS (3), /* addsub. */
1509 COSTS_N_INSNS (3), /* fpconst. */
1510 COSTS_N_INSNS (3), /* neg. */
1511 COSTS_N_INSNS (3), /* compare. */
1512 COSTS_N_INSNS (3), /* widen. */
1513 COSTS_N_INSNS (3), /* narrow. */
1514 COSTS_N_INSNS (3), /* toint. */
1515 COSTS_N_INSNS (3), /* fromint. */
1516 COSTS_N_INSNS (3) /* roundint. */
1520 COSTS_N_INSNS (30), /* div. */
1521 COSTS_N_INSNS (6), /* mult. */
1522 COSTS_N_INSNS (10), /* mult_addsub. */
1523 COSTS_N_INSNS (7), /* fma. */
1524 COSTS_N_INSNS (3), /* addsub. */
1525 COSTS_N_INSNS (3), /* fpconst. */
1526 COSTS_N_INSNS (3), /* neg. */
1527 COSTS_N_INSNS (3), /* compare. */
1528 COSTS_N_INSNS (3), /* widen. */
1529 COSTS_N_INSNS (3), /* narrow. */
1530 COSTS_N_INSNS (3), /* toint. */
1531 COSTS_N_INSNS (3), /* fromint. */
1532 COSTS_N_INSNS (3) /* roundint. */
1537 COSTS_N_INSNS (1), /* alu. */
1538 COSTS_N_INSNS (4), /* mult. */
1539 COSTS_N_INSNS (1), /* movi. */
1540 COSTS_N_INSNS (2), /* dup. */
1541 COSTS_N_INSNS (2) /* extract. */
1545 const struct cpu_cost_table cortexa12_extra_costs
=
1552 COSTS_N_INSNS (1), /* shift_reg. */
1553 COSTS_N_INSNS (1), /* arith_shift. */
1554 COSTS_N_INSNS (1), /* arith_shift_reg. */
1555 COSTS_N_INSNS (1), /* log_shift. */
1556 COSTS_N_INSNS (1), /* log_shift_reg. */
1558 COSTS_N_INSNS (1), /* extend_arith. */
1560 COSTS_N_INSNS (1), /* bfx. */
1561 COSTS_N_INSNS (1), /* clz. */
1562 COSTS_N_INSNS (1), /* rev. */
1564 true /* non_exec_costs_exec. */
1569 COSTS_N_INSNS (2), /* simple. */
1570 COSTS_N_INSNS (3), /* flag_setting. */
1571 COSTS_N_INSNS (2), /* extend. */
1572 COSTS_N_INSNS (3), /* add. */
1573 COSTS_N_INSNS (2), /* extend_add. */
1574 COSTS_N_INSNS (18) /* idiv. */
1578 0, /* simple (N/A). */
1579 0, /* flag_setting (N/A). */
1580 COSTS_N_INSNS (3), /* extend. */
1582 COSTS_N_INSNS (3), /* extend_add. */
1588 COSTS_N_INSNS (3), /* load. */
1589 COSTS_N_INSNS (3), /* load_sign_extend. */
1590 COSTS_N_INSNS (3), /* ldrd. */
1591 COSTS_N_INSNS (3), /* ldm_1st. */
1592 1, /* ldm_regs_per_insn_1st. */
1593 2, /* ldm_regs_per_insn_subsequent. */
1594 COSTS_N_INSNS (3), /* loadf. */
1595 COSTS_N_INSNS (3), /* loadd. */
1596 0, /* load_unaligned. */
1600 1, /* stm_regs_per_insn_1st. */
1601 2, /* stm_regs_per_insn_subsequent. */
1602 COSTS_N_INSNS (2), /* storef. */
1603 COSTS_N_INSNS (2), /* stored. */
1604 0, /* store_unaligned. */
1605 COSTS_N_INSNS (1), /* loadv. */
1606 COSTS_N_INSNS (1) /* storev. */
1611 COSTS_N_INSNS (17), /* div. */
1612 COSTS_N_INSNS (4), /* mult. */
1613 COSTS_N_INSNS (8), /* mult_addsub. */
1614 COSTS_N_INSNS (8), /* fma. */
1615 COSTS_N_INSNS (4), /* addsub. */
1616 COSTS_N_INSNS (2), /* fpconst. */
1617 COSTS_N_INSNS (2), /* neg. */
1618 COSTS_N_INSNS (2), /* compare. */
1619 COSTS_N_INSNS (4), /* widen. */
1620 COSTS_N_INSNS (4), /* narrow. */
1621 COSTS_N_INSNS (4), /* toint. */
1622 COSTS_N_INSNS (4), /* fromint. */
1623 COSTS_N_INSNS (4) /* roundint. */
1627 COSTS_N_INSNS (31), /* div. */
1628 COSTS_N_INSNS (4), /* mult. */
1629 COSTS_N_INSNS (8), /* mult_addsub. */
1630 COSTS_N_INSNS (8), /* fma. */
1631 COSTS_N_INSNS (4), /* addsub. */
1632 COSTS_N_INSNS (2), /* fpconst. */
1633 COSTS_N_INSNS (2), /* neg. */
1634 COSTS_N_INSNS (2), /* compare. */
1635 COSTS_N_INSNS (4), /* widen. */
1636 COSTS_N_INSNS (4), /* narrow. */
1637 COSTS_N_INSNS (4), /* toint. */
1638 COSTS_N_INSNS (4), /* fromint. */
1639 COSTS_N_INSNS (4) /* roundint. */
1644 COSTS_N_INSNS (1), /* alu. */
1645 COSTS_N_INSNS (4), /* mult. */
1646 COSTS_N_INSNS (1), /* movi. */
1647 COSTS_N_INSNS (2), /* dup. */
1648 COSTS_N_INSNS (2) /* extract. */
1652 const struct cpu_cost_table cortexa15_extra_costs
=
1660 COSTS_N_INSNS (1), /* arith_shift. */
1661 COSTS_N_INSNS (1), /* arith_shift_reg. */
1662 COSTS_N_INSNS (1), /* log_shift. */
1663 COSTS_N_INSNS (1), /* log_shift_reg. */
1665 COSTS_N_INSNS (1), /* extend_arith. */
1666 COSTS_N_INSNS (1), /* bfi. */
1671 true /* non_exec_costs_exec. */
1676 COSTS_N_INSNS (2), /* simple. */
1677 COSTS_N_INSNS (3), /* flag_setting. */
1678 COSTS_N_INSNS (2), /* extend. */
1679 COSTS_N_INSNS (2), /* add. */
1680 COSTS_N_INSNS (2), /* extend_add. */
1681 COSTS_N_INSNS (18) /* idiv. */
1685 0, /* simple (N/A). */
1686 0, /* flag_setting (N/A). */
1687 COSTS_N_INSNS (3), /* extend. */
1689 COSTS_N_INSNS (3), /* extend_add. */
1695 COSTS_N_INSNS (3), /* load. */
1696 COSTS_N_INSNS (3), /* load_sign_extend. */
1697 COSTS_N_INSNS (3), /* ldrd. */
1698 COSTS_N_INSNS (4), /* ldm_1st. */
1699 1, /* ldm_regs_per_insn_1st. */
1700 2, /* ldm_regs_per_insn_subsequent. */
1701 COSTS_N_INSNS (4), /* loadf. */
1702 COSTS_N_INSNS (4), /* loadd. */
1703 0, /* load_unaligned. */
1706 COSTS_N_INSNS (1), /* stm_1st. */
1707 1, /* stm_regs_per_insn_1st. */
1708 2, /* stm_regs_per_insn_subsequent. */
1711 0, /* store_unaligned. */
1712 COSTS_N_INSNS (1), /* loadv. */
1713 COSTS_N_INSNS (1) /* storev. */
1718 COSTS_N_INSNS (17), /* div. */
1719 COSTS_N_INSNS (4), /* mult. */
1720 COSTS_N_INSNS (8), /* mult_addsub. */
1721 COSTS_N_INSNS (8), /* fma. */
1722 COSTS_N_INSNS (4), /* addsub. */
1723 COSTS_N_INSNS (2), /* fpconst. */
1724 COSTS_N_INSNS (2), /* neg. */
1725 COSTS_N_INSNS (5), /* compare. */
1726 COSTS_N_INSNS (4), /* widen. */
1727 COSTS_N_INSNS (4), /* narrow. */
1728 COSTS_N_INSNS (4), /* toint. */
1729 COSTS_N_INSNS (4), /* fromint. */
1730 COSTS_N_INSNS (4) /* roundint. */
1734 COSTS_N_INSNS (31), /* div. */
1735 COSTS_N_INSNS (4), /* mult. */
1736 COSTS_N_INSNS (8), /* mult_addsub. */
1737 COSTS_N_INSNS (8), /* fma. */
1738 COSTS_N_INSNS (4), /* addsub. */
1739 COSTS_N_INSNS (2), /* fpconst. */
1740 COSTS_N_INSNS (2), /* neg. */
1741 COSTS_N_INSNS (2), /* compare. */
1742 COSTS_N_INSNS (4), /* widen. */
1743 COSTS_N_INSNS (4), /* narrow. */
1744 COSTS_N_INSNS (4), /* toint. */
1745 COSTS_N_INSNS (4), /* fromint. */
1746 COSTS_N_INSNS (4) /* roundint. */
1751 COSTS_N_INSNS (1), /* alu. */
1752 COSTS_N_INSNS (4), /* mult. */
1753 COSTS_N_INSNS (1), /* movi. */
1754 COSTS_N_INSNS (2), /* dup. */
1755 COSTS_N_INSNS (2) /* extract. */
1759 const struct cpu_cost_table v7m_extra_costs
=
1767 0, /* arith_shift. */
1768 COSTS_N_INSNS (1), /* arith_shift_reg. */
1770 COSTS_N_INSNS (1), /* log_shift_reg. */
1772 COSTS_N_INSNS (1), /* extend_arith. */
1777 COSTS_N_INSNS (1), /* non_exec. */
1778 false /* non_exec_costs_exec. */
1783 COSTS_N_INSNS (1), /* simple. */
1784 COSTS_N_INSNS (1), /* flag_setting. */
1785 COSTS_N_INSNS (2), /* extend. */
1786 COSTS_N_INSNS (1), /* add. */
1787 COSTS_N_INSNS (3), /* extend_add. */
1788 COSTS_N_INSNS (8) /* idiv. */
1792 0, /* simple (N/A). */
1793 0, /* flag_setting (N/A). */
1794 COSTS_N_INSNS (2), /* extend. */
1796 COSTS_N_INSNS (3), /* extend_add. */
1802 COSTS_N_INSNS (2), /* load. */
1803 0, /* load_sign_extend. */
1804 COSTS_N_INSNS (3), /* ldrd. */
1805 COSTS_N_INSNS (2), /* ldm_1st. */
1806 1, /* ldm_regs_per_insn_1st. */
1807 1, /* ldm_regs_per_insn_subsequent. */
1808 COSTS_N_INSNS (2), /* loadf. */
1809 COSTS_N_INSNS (3), /* loadd. */
1810 COSTS_N_INSNS (1), /* load_unaligned. */
1811 COSTS_N_INSNS (2), /* store. */
1812 COSTS_N_INSNS (3), /* strd. */
1813 COSTS_N_INSNS (2), /* stm_1st. */
1814 1, /* stm_regs_per_insn_1st. */
1815 1, /* stm_regs_per_insn_subsequent. */
1816 COSTS_N_INSNS (2), /* storef. */
1817 COSTS_N_INSNS (3), /* stored. */
1818 COSTS_N_INSNS (1), /* store_unaligned. */
1819 COSTS_N_INSNS (1), /* loadv. */
1820 COSTS_N_INSNS (1) /* storev. */
1825 COSTS_N_INSNS (7), /* div. */
1826 COSTS_N_INSNS (2), /* mult. */
1827 COSTS_N_INSNS (5), /* mult_addsub. */
1828 COSTS_N_INSNS (3), /* fma. */
1829 COSTS_N_INSNS (1), /* addsub. */
1841 COSTS_N_INSNS (15), /* div. */
1842 COSTS_N_INSNS (5), /* mult. */
1843 COSTS_N_INSNS (7), /* mult_addsub. */
1844 COSTS_N_INSNS (7), /* fma. */
1845 COSTS_N_INSNS (3), /* addsub. */
1858 COSTS_N_INSNS (1), /* alu. */
1859 COSTS_N_INSNS (4), /* mult. */
1860 COSTS_N_INSNS (1), /* movi. */
1861 COSTS_N_INSNS (2), /* dup. */
1862 COSTS_N_INSNS (2) /* extract. */
1866 const struct addr_mode_cost_table generic_addr_mode_costs
=
1870 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1871 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1872 COSTS_N_INSNS (0) /* AMO_WB. */
1876 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1877 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1878 COSTS_N_INSNS (0) /* AMO_WB. */
1882 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1883 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1884 COSTS_N_INSNS (0) /* AMO_WB. */
1888 const struct tune_params arm_slowmul_tune
=
1890 &generic_extra_costs
, /* Insn extra costs. */
1891 &generic_addr_mode_costs
, /* Addressing mode costs. */
1892 NULL
, /* Sched adj cost. */
1893 arm_default_branch_cost
,
1894 &arm_default_vec_cost
,
1895 3, /* Constant limit. */
1896 5, /* Max cond insns. */
1897 8, /* Memset max inline. */
1898 1, /* Issue rate. */
1899 ARM_PREFETCH_NOT_BENEFICIAL
,
1900 tune_params::PREF_CONST_POOL_TRUE
,
1901 tune_params::PREF_LDRD_FALSE
,
1902 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1903 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1904 tune_params::DISPARAGE_FLAGS_NEITHER
,
1905 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1906 tune_params::FUSE_NOTHING
,
1907 tune_params::SCHED_AUTOPREF_OFF
1910 const struct tune_params arm_fastmul_tune
=
1912 &generic_extra_costs
, /* Insn extra costs. */
1913 &generic_addr_mode_costs
, /* Addressing mode costs. */
1914 NULL
, /* Sched adj cost. */
1915 arm_default_branch_cost
,
1916 &arm_default_vec_cost
,
1917 1, /* Constant limit. */
1918 5, /* Max cond insns. */
1919 8, /* Memset max inline. */
1920 1, /* Issue rate. */
1921 ARM_PREFETCH_NOT_BENEFICIAL
,
1922 tune_params::PREF_CONST_POOL_TRUE
,
1923 tune_params::PREF_LDRD_FALSE
,
1924 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1925 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1926 tune_params::DISPARAGE_FLAGS_NEITHER
,
1927 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1928 tune_params::FUSE_NOTHING
,
1929 tune_params::SCHED_AUTOPREF_OFF
1932 /* StrongARM has early execution of branches, so a sequence that is worth
1933 skipping is shorter. Set max_insns_skipped to a lower value. */
1935 const struct tune_params arm_strongarm_tune
=
1937 &generic_extra_costs
, /* Insn extra costs. */
1938 &generic_addr_mode_costs
, /* Addressing mode costs. */
1939 NULL
, /* Sched adj cost. */
1940 arm_default_branch_cost
,
1941 &arm_default_vec_cost
,
1942 1, /* Constant limit. */
1943 3, /* Max cond insns. */
1944 8, /* Memset max inline. */
1945 1, /* Issue rate. */
1946 ARM_PREFETCH_NOT_BENEFICIAL
,
1947 tune_params::PREF_CONST_POOL_TRUE
,
1948 tune_params::PREF_LDRD_FALSE
,
1949 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1950 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1951 tune_params::DISPARAGE_FLAGS_NEITHER
,
1952 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1953 tune_params::FUSE_NOTHING
,
1954 tune_params::SCHED_AUTOPREF_OFF
1957 const struct tune_params arm_xscale_tune
=
1959 &generic_extra_costs
, /* Insn extra costs. */
1960 &generic_addr_mode_costs
, /* Addressing mode costs. */
1961 xscale_sched_adjust_cost
,
1962 arm_default_branch_cost
,
1963 &arm_default_vec_cost
,
1964 2, /* Constant limit. */
1965 3, /* Max cond insns. */
1966 8, /* Memset max inline. */
1967 1, /* Issue rate. */
1968 ARM_PREFETCH_NOT_BENEFICIAL
,
1969 tune_params::PREF_CONST_POOL_TRUE
,
1970 tune_params::PREF_LDRD_FALSE
,
1971 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1972 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1973 tune_params::DISPARAGE_FLAGS_NEITHER
,
1974 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1975 tune_params::FUSE_NOTHING
,
1976 tune_params::SCHED_AUTOPREF_OFF
1979 const struct tune_params arm_9e_tune
=
1981 &generic_extra_costs
, /* Insn extra costs. */
1982 &generic_addr_mode_costs
, /* Addressing mode costs. */
1983 NULL
, /* Sched adj cost. */
1984 arm_default_branch_cost
,
1985 &arm_default_vec_cost
,
1986 1, /* Constant limit. */
1987 5, /* Max cond insns. */
1988 8, /* Memset max inline. */
1989 1, /* Issue rate. */
1990 ARM_PREFETCH_NOT_BENEFICIAL
,
1991 tune_params::PREF_CONST_POOL_TRUE
,
1992 tune_params::PREF_LDRD_FALSE
,
1993 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1994 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1995 tune_params::DISPARAGE_FLAGS_NEITHER
,
1996 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1997 tune_params::FUSE_NOTHING
,
1998 tune_params::SCHED_AUTOPREF_OFF
2001 const struct tune_params arm_marvell_pj4_tune
=
2003 &generic_extra_costs
, /* Insn extra costs. */
2004 &generic_addr_mode_costs
, /* Addressing mode costs. */
2005 NULL
, /* Sched adj cost. */
2006 arm_default_branch_cost
,
2007 &arm_default_vec_cost
,
2008 1, /* Constant limit. */
2009 5, /* Max cond insns. */
2010 8, /* Memset max inline. */
2011 2, /* Issue rate. */
2012 ARM_PREFETCH_NOT_BENEFICIAL
,
2013 tune_params::PREF_CONST_POOL_TRUE
,
2014 tune_params::PREF_LDRD_FALSE
,
2015 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2016 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2017 tune_params::DISPARAGE_FLAGS_NEITHER
,
2018 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2019 tune_params::FUSE_NOTHING
,
2020 tune_params::SCHED_AUTOPREF_OFF
2023 const struct tune_params arm_v6t2_tune
=
2025 &generic_extra_costs
, /* Insn extra costs. */
2026 &generic_addr_mode_costs
, /* Addressing mode costs. */
2027 NULL
, /* Sched adj cost. */
2028 arm_default_branch_cost
,
2029 &arm_default_vec_cost
,
2030 1, /* Constant limit. */
2031 5, /* Max cond insns. */
2032 8, /* Memset max inline. */
2033 1, /* Issue rate. */
2034 ARM_PREFETCH_NOT_BENEFICIAL
,
2035 tune_params::PREF_CONST_POOL_FALSE
,
2036 tune_params::PREF_LDRD_FALSE
,
2037 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2038 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2039 tune_params::DISPARAGE_FLAGS_NEITHER
,
2040 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2041 tune_params::FUSE_NOTHING
,
2042 tune_params::SCHED_AUTOPREF_OFF
2046 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2047 const struct tune_params arm_cortex_tune
=
2049 &generic_extra_costs
,
2050 &generic_addr_mode_costs
, /* Addressing mode costs. */
2051 NULL
, /* Sched adj cost. */
2052 arm_default_branch_cost
,
2053 &arm_default_vec_cost
,
2054 1, /* Constant limit. */
2055 5, /* Max cond insns. */
2056 8, /* Memset max inline. */
2057 2, /* Issue rate. */
2058 ARM_PREFETCH_NOT_BENEFICIAL
,
2059 tune_params::PREF_CONST_POOL_FALSE
,
2060 tune_params::PREF_LDRD_FALSE
,
2061 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2062 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2063 tune_params::DISPARAGE_FLAGS_NEITHER
,
2064 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2065 tune_params::FUSE_NOTHING
,
2066 tune_params::SCHED_AUTOPREF_OFF
2069 const struct tune_params arm_cortex_a8_tune
=
2071 &cortexa8_extra_costs
,
2072 &generic_addr_mode_costs
, /* Addressing mode costs. */
2073 NULL
, /* Sched adj cost. */
2074 arm_default_branch_cost
,
2075 &arm_default_vec_cost
,
2076 1, /* Constant limit. */
2077 5, /* Max cond insns. */
2078 8, /* Memset max inline. */
2079 2, /* Issue rate. */
2080 ARM_PREFETCH_NOT_BENEFICIAL
,
2081 tune_params::PREF_CONST_POOL_FALSE
,
2082 tune_params::PREF_LDRD_FALSE
,
2083 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2084 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2085 tune_params::DISPARAGE_FLAGS_NEITHER
,
2086 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2087 tune_params::FUSE_NOTHING
,
2088 tune_params::SCHED_AUTOPREF_OFF
2091 const struct tune_params arm_cortex_a7_tune
=
2093 &cortexa7_extra_costs
,
2094 &generic_addr_mode_costs
, /* Addressing mode costs. */
2095 NULL
, /* Sched adj cost. */
2096 arm_default_branch_cost
,
2097 &arm_default_vec_cost
,
2098 1, /* Constant limit. */
2099 5, /* Max cond insns. */
2100 8, /* Memset max inline. */
2101 2, /* Issue rate. */
2102 ARM_PREFETCH_NOT_BENEFICIAL
,
2103 tune_params::PREF_CONST_POOL_FALSE
,
2104 tune_params::PREF_LDRD_FALSE
,
2105 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2106 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2107 tune_params::DISPARAGE_FLAGS_NEITHER
,
2108 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2109 tune_params::FUSE_NOTHING
,
2110 tune_params::SCHED_AUTOPREF_OFF
2113 const struct tune_params arm_cortex_a15_tune
=
2115 &cortexa15_extra_costs
,
2116 &generic_addr_mode_costs
, /* Addressing mode costs. */
2117 NULL
, /* Sched adj cost. */
2118 arm_default_branch_cost
,
2119 &arm_default_vec_cost
,
2120 1, /* Constant limit. */
2121 2, /* Max cond insns. */
2122 8, /* Memset max inline. */
2123 3, /* Issue rate. */
2124 ARM_PREFETCH_NOT_BENEFICIAL
,
2125 tune_params::PREF_CONST_POOL_FALSE
,
2126 tune_params::PREF_LDRD_TRUE
,
2127 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2128 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2129 tune_params::DISPARAGE_FLAGS_ALL
,
2130 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2131 tune_params::FUSE_NOTHING
,
2132 tune_params::SCHED_AUTOPREF_FULL
2135 const struct tune_params arm_cortex_a35_tune
=
2137 &cortexa53_extra_costs
,
2138 &generic_addr_mode_costs
, /* Addressing mode costs. */
2139 NULL
, /* Sched adj cost. */
2140 arm_default_branch_cost
,
2141 &arm_default_vec_cost
,
2142 1, /* Constant limit. */
2143 5, /* Max cond insns. */
2144 8, /* Memset max inline. */
2145 1, /* Issue rate. */
2146 ARM_PREFETCH_NOT_BENEFICIAL
,
2147 tune_params::PREF_CONST_POOL_FALSE
,
2148 tune_params::PREF_LDRD_FALSE
,
2149 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2150 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2151 tune_params::DISPARAGE_FLAGS_NEITHER
,
2152 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2153 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2154 tune_params::SCHED_AUTOPREF_OFF
2157 const struct tune_params arm_cortex_a53_tune
=
2159 &cortexa53_extra_costs
,
2160 &generic_addr_mode_costs
, /* Addressing mode costs. */
2161 NULL
, /* Sched adj cost. */
2162 arm_default_branch_cost
,
2163 &arm_default_vec_cost
,
2164 1, /* Constant limit. */
2165 5, /* Max cond insns. */
2166 8, /* Memset max inline. */
2167 2, /* Issue rate. */
2168 ARM_PREFETCH_NOT_BENEFICIAL
,
2169 tune_params::PREF_CONST_POOL_FALSE
,
2170 tune_params::PREF_LDRD_FALSE
,
2171 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2172 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2173 tune_params::DISPARAGE_FLAGS_NEITHER
,
2174 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2175 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2176 tune_params::SCHED_AUTOPREF_OFF
2179 const struct tune_params arm_cortex_a57_tune
=
2181 &cortexa57_extra_costs
,
2182 &generic_addr_mode_costs
, /* addressing mode costs */
2183 NULL
, /* Sched adj cost. */
2184 arm_default_branch_cost
,
2185 &arm_default_vec_cost
,
2186 1, /* Constant limit. */
2187 2, /* Max cond insns. */
2188 8, /* Memset max inline. */
2189 3, /* Issue rate. */
2190 ARM_PREFETCH_NOT_BENEFICIAL
,
2191 tune_params::PREF_CONST_POOL_FALSE
,
2192 tune_params::PREF_LDRD_TRUE
,
2193 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2194 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2195 tune_params::DISPARAGE_FLAGS_ALL
,
2196 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2197 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2198 tune_params::SCHED_AUTOPREF_FULL
2201 const struct tune_params arm_exynosm1_tune
=
2203 &exynosm1_extra_costs
,
2204 &generic_addr_mode_costs
, /* Addressing mode costs. */
2205 NULL
, /* Sched adj cost. */
2206 arm_default_branch_cost
,
2207 &arm_default_vec_cost
,
2208 1, /* Constant limit. */
2209 2, /* Max cond insns. */
2210 8, /* Memset max inline. */
2211 3, /* Issue rate. */
2212 ARM_PREFETCH_NOT_BENEFICIAL
,
2213 tune_params::PREF_CONST_POOL_FALSE
,
2214 tune_params::PREF_LDRD_TRUE
,
2215 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2216 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2217 tune_params::DISPARAGE_FLAGS_ALL
,
2218 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2219 tune_params::FUSE_NOTHING
,
2220 tune_params::SCHED_AUTOPREF_OFF
2223 const struct tune_params arm_xgene1_tune
=
2225 &xgene1_extra_costs
,
2226 &generic_addr_mode_costs
, /* Addressing mode costs. */
2227 NULL
, /* Sched adj cost. */
2228 arm_default_branch_cost
,
2229 &arm_default_vec_cost
,
2230 1, /* Constant limit. */
2231 2, /* Max cond insns. */
2232 32, /* Memset max inline. */
2233 4, /* Issue rate. */
2234 ARM_PREFETCH_NOT_BENEFICIAL
,
2235 tune_params::PREF_CONST_POOL_FALSE
,
2236 tune_params::PREF_LDRD_TRUE
,
2237 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2238 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2239 tune_params::DISPARAGE_FLAGS_ALL
,
2240 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2241 tune_params::FUSE_NOTHING
,
2242 tune_params::SCHED_AUTOPREF_OFF
2245 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2246 less appealing. Set max_insns_skipped to a low value. */
2248 const struct tune_params arm_cortex_a5_tune
=
2250 &cortexa5_extra_costs
,
2251 &generic_addr_mode_costs
, /* Addressing mode costs. */
2252 NULL
, /* Sched adj cost. */
2253 arm_cortex_a5_branch_cost
,
2254 &arm_default_vec_cost
,
2255 1, /* Constant limit. */
2256 1, /* Max cond insns. */
2257 8, /* Memset max inline. */
2258 2, /* Issue rate. */
2259 ARM_PREFETCH_NOT_BENEFICIAL
,
2260 tune_params::PREF_CONST_POOL_FALSE
,
2261 tune_params::PREF_LDRD_FALSE
,
2262 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2263 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2264 tune_params::DISPARAGE_FLAGS_NEITHER
,
2265 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2266 tune_params::FUSE_NOTHING
,
2267 tune_params::SCHED_AUTOPREF_OFF
2270 const struct tune_params arm_cortex_a9_tune
=
2272 &cortexa9_extra_costs
,
2273 &generic_addr_mode_costs
, /* Addressing mode costs. */
2274 cortex_a9_sched_adjust_cost
,
2275 arm_default_branch_cost
,
2276 &arm_default_vec_cost
,
2277 1, /* Constant limit. */
2278 5, /* Max cond insns. */
2279 8, /* Memset max inline. */
2280 2, /* Issue rate. */
2281 ARM_PREFETCH_BENEFICIAL(4,32,32),
2282 tune_params::PREF_CONST_POOL_FALSE
,
2283 tune_params::PREF_LDRD_FALSE
,
2284 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2285 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2286 tune_params::DISPARAGE_FLAGS_NEITHER
,
2287 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2288 tune_params::FUSE_NOTHING
,
2289 tune_params::SCHED_AUTOPREF_OFF
2292 const struct tune_params arm_cortex_a12_tune
=
2294 &cortexa12_extra_costs
,
2295 &generic_addr_mode_costs
, /* Addressing mode costs. */
2296 NULL
, /* Sched adj cost. */
2297 arm_default_branch_cost
,
2298 &arm_default_vec_cost
, /* Vectorizer costs. */
2299 1, /* Constant limit. */
2300 2, /* Max cond insns. */
2301 8, /* Memset max inline. */
2302 2, /* Issue rate. */
2303 ARM_PREFETCH_NOT_BENEFICIAL
,
2304 tune_params::PREF_CONST_POOL_FALSE
,
2305 tune_params::PREF_LDRD_TRUE
,
2306 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2307 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2308 tune_params::DISPARAGE_FLAGS_ALL
,
2309 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2310 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2311 tune_params::SCHED_AUTOPREF_OFF
2314 const struct tune_params arm_cortex_a73_tune
=
2316 &cortexa57_extra_costs
,
2317 &generic_addr_mode_costs
, /* Addressing mode costs. */
2318 NULL
, /* Sched adj cost. */
2319 arm_default_branch_cost
,
2320 &arm_default_vec_cost
, /* Vectorizer costs. */
2321 1, /* Constant limit. */
2322 2, /* Max cond insns. */
2323 8, /* Memset max inline. */
2324 2, /* Issue rate. */
2325 ARM_PREFETCH_NOT_BENEFICIAL
,
2326 tune_params::PREF_CONST_POOL_FALSE
,
2327 tune_params::PREF_LDRD_TRUE
,
2328 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2329 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2330 tune_params::DISPARAGE_FLAGS_ALL
,
2331 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2332 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2333 tune_params::SCHED_AUTOPREF_FULL
2336 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2337 cycle to execute each. An LDR from the constant pool also takes two cycles
2338 to execute, but mildly increases pipelining opportunity (consecutive
2339 loads/stores can be pipelined together, saving one cycle), and may also
2340 improve icache utilisation. Hence we prefer the constant pool for such
2343 const struct tune_params arm_v7m_tune
=
2346 &generic_addr_mode_costs
, /* Addressing mode costs. */
2347 NULL
, /* Sched adj cost. */
2348 arm_cortex_m_branch_cost
,
2349 &arm_default_vec_cost
,
2350 1, /* Constant limit. */
2351 2, /* Max cond insns. */
2352 8, /* Memset max inline. */
2353 1, /* Issue rate. */
2354 ARM_PREFETCH_NOT_BENEFICIAL
,
2355 tune_params::PREF_CONST_POOL_TRUE
,
2356 tune_params::PREF_LDRD_FALSE
,
2357 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2358 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2359 tune_params::DISPARAGE_FLAGS_NEITHER
,
2360 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2361 tune_params::FUSE_NOTHING
,
2362 tune_params::SCHED_AUTOPREF_OFF
2365 /* Cortex-M7 tuning. */
2367 const struct tune_params arm_cortex_m7_tune
=
2370 &generic_addr_mode_costs
, /* Addressing mode costs. */
2371 NULL
, /* Sched adj cost. */
2372 arm_cortex_m7_branch_cost
,
2373 &arm_default_vec_cost
,
2374 0, /* Constant limit. */
2375 1, /* Max cond insns. */
2376 8, /* Memset max inline. */
2377 2, /* Issue rate. */
2378 ARM_PREFETCH_NOT_BENEFICIAL
,
2379 tune_params::PREF_CONST_POOL_TRUE
,
2380 tune_params::PREF_LDRD_FALSE
,
2381 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2382 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2383 tune_params::DISPARAGE_FLAGS_NEITHER
,
2384 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2385 tune_params::FUSE_NOTHING
,
2386 tune_params::SCHED_AUTOPREF_OFF
2389 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2390 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2392 const struct tune_params arm_v6m_tune
=
2394 &generic_extra_costs
, /* Insn extra costs. */
2395 &generic_addr_mode_costs
, /* Addressing mode costs. */
2396 NULL
, /* Sched adj cost. */
2397 arm_default_branch_cost
,
2398 &arm_default_vec_cost
, /* Vectorizer costs. */
2399 1, /* Constant limit. */
2400 5, /* Max cond insns. */
2401 8, /* Memset max inline. */
2402 1, /* Issue rate. */
2403 ARM_PREFETCH_NOT_BENEFICIAL
,
2404 tune_params::PREF_CONST_POOL_FALSE
,
2405 tune_params::PREF_LDRD_FALSE
,
2406 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2407 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2408 tune_params::DISPARAGE_FLAGS_NEITHER
,
2409 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2410 tune_params::FUSE_NOTHING
,
2411 tune_params::SCHED_AUTOPREF_OFF
2414 const struct tune_params arm_fa726te_tune
=
2416 &generic_extra_costs
, /* Insn extra costs. */
2417 &generic_addr_mode_costs
, /* Addressing mode costs. */
2418 fa726te_sched_adjust_cost
,
2419 arm_default_branch_cost
,
2420 &arm_default_vec_cost
,
2421 1, /* Constant limit. */
2422 5, /* Max cond insns. */
2423 8, /* Memset max inline. */
2424 2, /* Issue rate. */
2425 ARM_PREFETCH_NOT_BENEFICIAL
,
2426 tune_params::PREF_CONST_POOL_TRUE
,
2427 tune_params::PREF_LDRD_FALSE
,
2428 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2429 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2430 tune_params::DISPARAGE_FLAGS_NEITHER
,
2431 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2432 tune_params::FUSE_NOTHING
,
2433 tune_params::SCHED_AUTOPREF_OFF
2436 /* Auto-generated CPU, FPU and architecture tables. */
2437 #include "arm-cpu-data.h"
2439 /* The name of the preprocessor macro to define for this architecture. PROFILE
2440 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2441 is thus chosen to be big enough to hold the longest architecture name. */
2443 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2445 /* Supported TLS relocations. */
2456 TLS_DESCSEQ
/* GNU scheme */
2459 /* The maximum number of insns to be used when loading a constant. */
2461 arm_constant_limit (bool size_p
)
2463 return size_p
? 1 : current_tune
->constant_limit
;
2466 /* Emit an insn that's a simple single-set. Both the operands must be known
2468 inline static rtx_insn
*
2469 emit_set_insn (rtx x
, rtx y
)
2471 return emit_insn (gen_rtx_SET (x
, y
));
2474 /* Return the number of bits set in VALUE. */
2476 bit_count (unsigned long value
)
2478 unsigned long count
= 0;
2483 value
&= value
- 1; /* Clear the least-significant set bit. */
2489 /* Return the number of bits set in BMAP. */
2491 bitmap_popcount (const sbitmap bmap
)
2493 unsigned int count
= 0;
2495 sbitmap_iterator sbi
;
2497 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2506 } arm_fixed_mode_set
;
2508 /* A small helper for setting fixed-point library libfuncs. */
2511 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2512 const char *funcname
, const char *modename
,
2517 if (num_suffix
== 0)
2518 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2520 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2522 set_optab_libfunc (optable
, mode
, buffer
);
2526 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2527 machine_mode from
, const char *funcname
,
2528 const char *toname
, const char *fromname
)
2531 const char *maybe_suffix_2
= "";
2533 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2534 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2535 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2536 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2537 maybe_suffix_2
= "2";
2539 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2542 set_conv_libfunc (optable
, to
, from
, buffer
);
2545 static GTY(()) rtx speculation_barrier_libfunc
;
2547 /* Record that we have no arithmetic or comparison libfuncs for
2548 machine mode MODE. */
2551 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode
)
2554 set_optab_libfunc (add_optab
, mode
, NULL
);
2555 set_optab_libfunc (sdiv_optab
, mode
, NULL
);
2556 set_optab_libfunc (smul_optab
, mode
, NULL
);
2557 set_optab_libfunc (neg_optab
, mode
, NULL
);
2558 set_optab_libfunc (sub_optab
, mode
, NULL
);
2561 set_optab_libfunc (eq_optab
, mode
, NULL
);
2562 set_optab_libfunc (ne_optab
, mode
, NULL
);
2563 set_optab_libfunc (lt_optab
, mode
, NULL
);
2564 set_optab_libfunc (le_optab
, mode
, NULL
);
2565 set_optab_libfunc (ge_optab
, mode
, NULL
);
2566 set_optab_libfunc (gt_optab
, mode
, NULL
);
2567 set_optab_libfunc (unord_optab
, mode
, NULL
);
2570 /* Set up library functions unique to ARM. */
2572 arm_init_libfuncs (void)
2574 machine_mode mode_iter
;
2576 /* For Linux, we have access to kernel support for atomic operations. */
2577 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2578 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2580 /* There are no special library functions unless we are using the
2585 /* The functions below are described in Section 4 of the "Run-Time
2586 ABI for the ARM architecture", Version 1.0. */
2588 /* Double-precision floating-point arithmetic. Table 2. */
2589 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2590 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2591 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2592 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2593 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2595 /* Double-precision comparisons. Table 3. */
2596 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2597 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2598 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2599 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2600 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2601 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2602 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2604 /* Single-precision floating-point arithmetic. Table 4. */
2605 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2606 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2607 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2608 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2609 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2611 /* Single-precision comparisons. Table 5. */
2612 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2613 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2614 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2615 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2616 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2617 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2618 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2620 /* Floating-point to integer conversions. Table 6. */
2621 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2622 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2623 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2624 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2625 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2626 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2627 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2628 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2630 /* Conversions between floating types. Table 7. */
2631 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2632 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2634 /* Integer to floating-point conversions. Table 8. */
2635 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2636 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2637 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2638 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2639 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2640 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2641 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2642 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2644 /* Long long. Table 9. */
2645 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2646 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2647 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2648 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2649 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2650 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2651 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2652 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2654 /* Integer (32/32->32) division. \S 4.3.1. */
2655 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2656 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2658 /* The divmod functions are designed so that they can be used for
2659 plain division, even though they return both the quotient and the
2660 remainder. The quotient is returned in the usual location (i.e.,
2661 r0 for SImode, {r0, r1} for DImode), just as would be expected
2662 for an ordinary division routine. Because the AAPCS calling
2663 conventions specify that all of { r0, r1, r2, r3 } are
2664 callee-saved registers, there is no need to tell the compiler
2665 explicitly that those registers are clobbered by these
2667 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2668 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2670 /* For SImode division the ABI provides div-without-mod routines,
2671 which are faster. */
2672 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2673 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2675 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2676 divmod libcalls instead. */
2677 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2678 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2679 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2680 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2682 /* Half-precision float operations. The compiler handles all operations
2683 with NULL libfuncs by converting the SFmode. */
2684 switch (arm_fp16_format
)
2686 case ARM_FP16_FORMAT_IEEE
:
2687 case ARM_FP16_FORMAT_ALTERNATIVE
:
2690 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2691 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2693 : "__gnu_f2h_alternative"));
2694 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2695 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2697 : "__gnu_h2f_alternative"));
2699 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2700 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2702 : "__gnu_d2h_alternative"));
2704 arm_block_arith_comp_libfuncs_for_mode (HFmode
);
2711 /* For all possible libcalls in BFmode, record NULL. */
2712 FOR_EACH_MODE_IN_CLASS (mode_iter
, MODE_FLOAT
)
2714 set_conv_libfunc (trunc_optab
, BFmode
, mode_iter
, NULL
);
2715 set_conv_libfunc (trunc_optab
, mode_iter
, BFmode
, NULL
);
2716 set_conv_libfunc (sext_optab
, mode_iter
, BFmode
, NULL
);
2717 set_conv_libfunc (sext_optab
, BFmode
, mode_iter
, NULL
);
2719 arm_block_arith_comp_libfuncs_for_mode (BFmode
);
2721 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2723 const arm_fixed_mode_set fixed_arith_modes
[] =
2726 { E_UQQmode
, "uqq" },
2728 { E_UHQmode
, "uhq" },
2730 { E_USQmode
, "usq" },
2732 { E_UDQmode
, "udq" },
2734 { E_UTQmode
, "utq" },
2736 { E_UHAmode
, "uha" },
2738 { E_USAmode
, "usa" },
2740 { E_UDAmode
, "uda" },
2742 { E_UTAmode
, "uta" }
2744 const arm_fixed_mode_set fixed_conv_modes
[] =
2747 { E_UQQmode
, "uqq" },
2749 { E_UHQmode
, "uhq" },
2751 { E_USQmode
, "usq" },
2753 { E_UDQmode
, "udq" },
2755 { E_UTQmode
, "utq" },
2757 { E_UHAmode
, "uha" },
2759 { E_USAmode
, "usa" },
2761 { E_UDAmode
, "uda" },
2763 { E_UTAmode
, "uta" },
2774 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2776 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2777 "add", fixed_arith_modes
[i
].name
, 3);
2778 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2779 "ssadd", fixed_arith_modes
[i
].name
, 3);
2780 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2781 "usadd", fixed_arith_modes
[i
].name
, 3);
2782 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2783 "sub", fixed_arith_modes
[i
].name
, 3);
2784 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2785 "sssub", fixed_arith_modes
[i
].name
, 3);
2786 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2787 "ussub", fixed_arith_modes
[i
].name
, 3);
2788 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2789 "mul", fixed_arith_modes
[i
].name
, 3);
2790 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2791 "ssmul", fixed_arith_modes
[i
].name
, 3);
2792 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2793 "usmul", fixed_arith_modes
[i
].name
, 3);
2794 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2795 "div", fixed_arith_modes
[i
].name
, 3);
2796 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2797 "udiv", fixed_arith_modes
[i
].name
, 3);
2798 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2799 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2800 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2801 "usdiv", fixed_arith_modes
[i
].name
, 3);
2802 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2803 "neg", fixed_arith_modes
[i
].name
, 2);
2804 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2805 "ssneg", fixed_arith_modes
[i
].name
, 2);
2806 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2807 "usneg", fixed_arith_modes
[i
].name
, 2);
2808 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2809 "ashl", fixed_arith_modes
[i
].name
, 3);
2810 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2811 "ashr", fixed_arith_modes
[i
].name
, 3);
2812 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2813 "lshr", fixed_arith_modes
[i
].name
, 3);
2814 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2815 "ssashl", fixed_arith_modes
[i
].name
, 3);
2816 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2817 "usashl", fixed_arith_modes
[i
].name
, 3);
2818 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2819 "cmp", fixed_arith_modes
[i
].name
, 2);
2822 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2823 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2826 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2827 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2830 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2831 fixed_conv_modes
[j
].mode
, "fract",
2832 fixed_conv_modes
[i
].name
,
2833 fixed_conv_modes
[j
].name
);
2834 arm_set_fixed_conv_libfunc (satfract_optab
,
2835 fixed_conv_modes
[i
].mode
,
2836 fixed_conv_modes
[j
].mode
, "satfract",
2837 fixed_conv_modes
[i
].name
,
2838 fixed_conv_modes
[j
].name
);
2839 arm_set_fixed_conv_libfunc (fractuns_optab
,
2840 fixed_conv_modes
[i
].mode
,
2841 fixed_conv_modes
[j
].mode
, "fractuns",
2842 fixed_conv_modes
[i
].name
,
2843 fixed_conv_modes
[j
].name
);
2844 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2845 fixed_conv_modes
[i
].mode
,
2846 fixed_conv_modes
[j
].mode
, "satfractuns",
2847 fixed_conv_modes
[i
].name
,
2848 fixed_conv_modes
[j
].name
);
2852 if (TARGET_AAPCS_BASED
)
2853 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2855 speculation_barrier_libfunc
= init_one_libfunc ("__speculation_barrier");
2858 /* Implement TARGET_GIMPLE_FOLD_BUILTIN. */
2860 arm_gimple_fold_builtin (gimple_stmt_iterator
*gsi
)
2862 gcall
*stmt
= as_a
<gcall
*> (gsi_stmt (*gsi
));
2863 tree fndecl
= gimple_call_fndecl (stmt
);
2864 unsigned int code
= DECL_MD_FUNCTION_CODE (fndecl
);
2865 unsigned int subcode
= code
>> ARM_BUILTIN_SHIFT
;
2866 gimple
*new_stmt
= NULL
;
2867 switch (code
& ARM_BUILTIN_CLASS
)
2869 case ARM_BUILTIN_GENERAL
:
2871 case ARM_BUILTIN_MVE
:
2872 new_stmt
= arm_mve::gimple_fold_builtin (subcode
, stmt
);
2877 gsi_replace (gsi
, new_stmt
, true);
2881 /* On AAPCS systems, this is the "struct __va_list". */
2882 static GTY(()) tree va_list_type
;
2884 /* Return the type to use as __builtin_va_list. */
2886 arm_build_builtin_va_list (void)
2891 if (!TARGET_AAPCS_BASED
)
2892 return std_build_builtin_va_list ();
2894 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2902 The C Library ABI further reinforces this definition in \S
2905 We must follow this definition exactly. The structure tag
2906 name is visible in C++ mangled names, and thus forms a part
2907 of the ABI. The field name may be used by people who
2908 #include <stdarg.h>. */
2909 /* Create the type. */
2910 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2911 /* Give it the required name. */
2912 va_list_name
= build_decl (BUILTINS_LOCATION
,
2914 get_identifier ("__va_list"),
2916 DECL_ARTIFICIAL (va_list_name
) = 1;
2917 TYPE_NAME (va_list_type
) = va_list_name
;
2918 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2919 /* Create the __ap field. */
2920 ap_field
= build_decl (BUILTINS_LOCATION
,
2922 get_identifier ("__ap"),
2924 DECL_ARTIFICIAL (ap_field
) = 1;
2925 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2926 TYPE_FIELDS (va_list_type
) = ap_field
;
2927 /* Compute its layout. */
2928 layout_type (va_list_type
);
2930 return va_list_type
;
2933 /* Return an expression of type "void *" pointing to the next
2934 available argument in a variable-argument list. VALIST is the
2935 user-level va_list object, of type __builtin_va_list. */
2937 arm_extract_valist_ptr (tree valist
)
2939 if (TREE_TYPE (valist
) == error_mark_node
)
2940 return error_mark_node
;
2942 /* On an AAPCS target, the pointer is stored within "struct
2944 if (TARGET_AAPCS_BASED
)
2946 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2947 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2948 valist
, ap_field
, NULL_TREE
);
2954 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2956 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2958 valist
= arm_extract_valist_ptr (valist
);
2959 std_expand_builtin_va_start (valist
, nextarg
);
2962 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2964 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2967 valist
= arm_extract_valist_ptr (valist
);
2968 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2971 /* Check any incompatible options that the user has specified. */
2973 arm_option_check_internal (struct gcc_options
*opts
)
2975 int flags
= opts
->x_target_flags
;
2977 /* iWMMXt and NEON are incompatible. */
2979 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2980 error ("iWMMXt and NEON are incompatible");
2982 /* Make sure that the processor choice does not conflict with any of the
2983 other command line choices. */
2984 if (TARGET_ARM_P (flags
)
2985 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2986 error ("target CPU does not support ARM mode");
2988 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2989 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2990 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2992 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2993 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2995 /* If this target is normally configured to use APCS frames, warn if they
2996 are turned off and debugging is turned on. */
2997 if (TARGET_ARM_P (flags
)
2998 && write_symbols
!= NO_DEBUG
2999 && !TARGET_APCS_FRAME
3000 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
3001 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
3004 /* iWMMXt unsupported under Thumb mode. */
3005 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
3006 error ("iWMMXt unsupported under Thumb mode");
3008 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
3009 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
3011 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
3013 error ("RTP PIC is incompatible with Thumb");
3017 if (target_pure_code
|| target_slow_flash_data
)
3019 const char *flag
= (target_pure_code
? "-mpure-code" :
3020 "-mslow-flash-data");
3021 bool common_unsupported_modes
= arm_arch_notm
|| flag_pic
|| TARGET_NEON
;
3023 /* We only support -mslow-flash-data on M-profile targets with
3025 if (target_slow_flash_data
&& (!TARGET_HAVE_MOVT
|| common_unsupported_modes
))
3026 error ("%s only supports non-pic code on M-profile targets with the "
3027 "MOVT instruction", flag
);
3029 /* We only support -mpure-code on M-profile targets. */
3030 if (target_pure_code
&& common_unsupported_modes
)
3031 error ("%s only supports non-pic code on M-profile targets", flag
);
3033 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
3034 -mword-relocations forbids relocation of MOVT/MOVW. */
3035 if (target_word_relocations
)
3036 error ("%s incompatible with %<-mword-relocations%>", flag
);
3040 /* Recompute the global settings depending on target attribute options. */
3043 arm_option_params_internal (void)
3045 /* If we are not using the default (ARM mode) section anchor offset
3046 ranges, then set the correct ranges now. */
3049 /* Thumb-1 LDR instructions cannot have negative offsets.
3050 Permissible positive offset ranges are 5-bit (for byte loads),
3051 6-bit (for halfword loads), or 7-bit (for word loads).
3052 Empirical results suggest a 7-bit anchor range gives the best
3053 overall code size. */
3054 targetm
.min_anchor_offset
= 0;
3055 targetm
.max_anchor_offset
= 127;
3057 else if (TARGET_THUMB2
)
3059 /* The minimum is set such that the total size of the block
3060 for a particular anchor is 248 + 1 + 4095 bytes, which is
3061 divisible by eight, ensuring natural spacing of anchors. */
3062 targetm
.min_anchor_offset
= -248;
3063 targetm
.max_anchor_offset
= 4095;
3067 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
3068 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
3071 /* Increase the number of conditional instructions with -Os. */
3072 max_insns_skipped
= optimize_size
? 4 : current_tune
->max_insns_skipped
;
3074 /* For THUMB2, we limit the conditional sequence to one IT block. */
3076 max_insns_skipped
= MIN (max_insns_skipped
, MAX_INSN_PER_IT_BLOCK
);
3079 targetm
.md_asm_adjust
= thumb1_md_asm_adjust
;
3081 targetm
.md_asm_adjust
= arm_md_asm_adjust
;
3084 /* True if -mflip-thumb should next add an attribute for the default
3085 mode, false if it should next add an attribute for the opposite mode. */
3086 static GTY(()) bool thumb_flipper
;
3088 /* Options after initial target override. */
3089 static GTY(()) tree init_optimize
;
3092 arm_override_options_after_change_1 (struct gcc_options
*opts
,
3093 struct gcc_options
*opts_set
)
3095 /* -falign-functions without argument: supply one. */
3096 if (opts
->x_flag_align_functions
&& !opts_set
->x_str_align_functions
)
3097 opts
->x_str_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
3098 && opts
->x_optimize_size
? "2" : "4";
3101 /* Implement targetm.override_options_after_change. */
3104 arm_override_options_after_change (void)
3106 arm_override_options_after_change_1 (&global_options
, &global_options_set
);
3109 /* Implement TARGET_OPTION_RESTORE. */
3111 arm_option_restore (struct gcc_options */
* opts */
,
3112 struct gcc_options */
* opts_set */
,
3113 struct cl_target_option
*ptr
)
3115 arm_configure_build_target (&arm_active_target
, ptr
, false);
3116 arm_option_reconfigure_globals ();
3119 /* Reset options between modes that the user has specified. */
3121 arm_option_override_internal (struct gcc_options
*opts
,
3122 struct gcc_options
*opts_set
)
3124 arm_override_options_after_change_1 (opts
, opts_set
);
3126 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3128 /* The default is to enable interworking, so this warning message would
3129 be confusing to users who have just compiled with
3130 eg, -march=armv4. */
3131 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3132 opts
->x_target_flags
&= ~MASK_INTERWORK
;
3135 if (TARGET_THUMB_P (opts
->x_target_flags
)
3136 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3138 warning (0, "target CPU does not support THUMB instructions");
3139 opts
->x_target_flags
&= ~MASK_THUMB
;
3142 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
3144 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3145 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
3148 /* Callee super interworking implies thumb interworking. Adding
3149 this to the flags here simplifies the logic elsewhere. */
3150 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
3151 opts
->x_target_flags
|= MASK_INTERWORK
;
3153 /* need to remember initial values so combinaisons of options like
3154 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3155 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
3157 if (! opts_set
->x_arm_restrict_it
)
3158 opts
->x_arm_restrict_it
= arm_arch8
;
3160 /* ARM execution state and M profile don't have [restrict] IT. */
3161 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
3162 opts
->x_arm_restrict_it
= 0;
3164 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3165 if (!opts_set
->x_arm_restrict_it
3166 && (opts_set
->x_arm_cpu_string
|| opts_set
->x_arm_tune_string
))
3167 opts
->x_arm_restrict_it
= 0;
3169 /* Enable -munaligned-access by default for
3170 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3171 i.e. Thumb2 and ARM state only.
3172 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3173 - ARMv8 architecture-base processors.
3175 Disable -munaligned-access by default for
3176 - all pre-ARMv6 architecture-based processors
3177 - ARMv6-M architecture-based processors
3178 - ARMv8-M Baseline processors. */
3180 if (! opts_set
->x_unaligned_access
)
3182 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
3183 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
3185 else if (opts
->x_unaligned_access
== 1
3186 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3188 warning (0, "target CPU does not support unaligned accesses");
3189 opts
->x_unaligned_access
= 0;
3192 /* Don't warn since it's on by default in -O2. */
3193 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3194 opts
->x_flag_schedule_insns
= 0;
3196 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3198 /* Disable shrink-wrap when optimizing function for size, since it tends to
3199 generate additional returns. */
3200 if (optimize_function_for_size_p (cfun
)
3201 && TARGET_THUMB2_P (opts
->x_target_flags
))
3202 opts
->x_flag_shrink_wrap
= false;
3204 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3206 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3207 - epilogue_insns - does not accurately model the corresponding insns
3208 emitted in the asm file. In particular, see the comment in thumb_exit
3209 'Find out how many of the (return) argument registers we can corrupt'.
3210 As a consequence, the epilogue may clobber registers without fipa-ra
3211 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3212 TODO: Accurately model clobbers for epilogue_insns and reenable
3214 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3215 opts
->x_flag_ipa_ra
= 0;
3217 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3219 /* Thumb2 inline assembly code should always use unified syntax.
3220 This will apply to ARM and Thumb1 eventually. */
3221 if (TARGET_THUMB2_P (opts
->x_target_flags
))
3222 opts
->x_inline_asm_unified
= true;
3224 if (arm_stack_protector_guard
== SSP_GLOBAL
3225 && opts
->x_arm_stack_protector_guard_offset_str
)
3227 error ("incompatible options %<-mstack-protector-guard=global%> and "
3228 "%<-mstack-protector-guard-offset=%s%>",
3229 arm_stack_protector_guard_offset_str
);
3232 if (opts
->x_arm_stack_protector_guard_offset_str
)
3235 const char *str
= arm_stack_protector_guard_offset_str
;
3237 long offs
= strtol (arm_stack_protector_guard_offset_str
, &end
, 0);
3238 if (!*str
|| *end
|| errno
)
3239 error ("%qs is not a valid offset in %qs", str
,
3240 "-mstack-protector-guard-offset=");
3241 arm_stack_protector_guard_offset
= offs
;
3244 if (arm_current_function_pac_enabled_p ())
3246 if (!arm_arch8m_main
)
3247 error ("This architecture does not support branch protection "
3249 if (TARGET_TPCS_FRAME
)
3250 sorry ("Return address signing is not supported with %<-mtpcs-frame%>.");
3253 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3254 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3258 static sbitmap isa_all_fpubits_internal
;
3259 static sbitmap isa_all_fpbits
;
3260 static sbitmap isa_quirkbits
;
3262 /* Configure a build target TARGET from the user-specified options OPTS and
3263 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3264 architecture have been specified, but the two are not identical. */
3266 arm_configure_build_target (struct arm_build_target
*target
,
3267 struct cl_target_option
*opts
,
3268 bool warn_compatible
)
3270 const cpu_option
*arm_selected_tune
= NULL
;
3271 const arch_option
*arm_selected_arch
= NULL
;
3272 const cpu_option
*arm_selected_cpu
= NULL
;
3273 const arm_fpu_desc
*arm_selected_fpu
= NULL
;
3274 const char *tune_opts
= NULL
;
3275 const char *arch_opts
= NULL
;
3276 const char *cpu_opts
= NULL
;
3278 bitmap_clear (target
->isa
);
3279 target
->core_name
= NULL
;
3280 target
->arch_name
= NULL
;
3282 if (opts
->x_arm_arch_string
)
3284 arm_selected_arch
= arm_parse_arch_option_name (all_architectures
,
3286 opts
->x_arm_arch_string
);
3287 arch_opts
= strchr (opts
->x_arm_arch_string
, '+');
3290 if (opts
->x_arm_cpu_string
)
3292 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "-mcpu",
3293 opts
->x_arm_cpu_string
);
3294 cpu_opts
= strchr (opts
->x_arm_cpu_string
, '+');
3295 arm_selected_tune
= arm_selected_cpu
;
3296 /* If taking the tuning from -mcpu, we don't need to rescan the
3297 options for tuning. */
3300 if (opts
->x_arm_tune_string
)
3302 arm_selected_tune
= arm_parse_cpu_option_name (all_cores
, "-mtune",
3303 opts
->x_arm_tune_string
);
3304 tune_opts
= strchr (opts
->x_arm_tune_string
, '+');
3307 if (opts
->x_arm_branch_protection_string
)
3309 aarch_validate_mbranch_protection (opts
->x_arm_branch_protection_string
,
3310 "-mbranch-protection=");
3312 if (aarch_ra_sign_key
!= AARCH_KEY_A
)
3314 warning (0, "invalid key type for %<-mbranch-protection=%>");
3315 aarch_ra_sign_key
= AARCH_KEY_A
;
3319 if (arm_selected_arch
)
3321 arm_initialize_isa (target
->isa
, arm_selected_arch
->common
.isa_bits
);
3322 arm_parse_option_features (target
->isa
, &arm_selected_arch
->common
,
3325 if (arm_selected_cpu
)
3327 auto_sbitmap
cpu_isa (isa_num_bits
);
3328 auto_sbitmap
isa_delta (isa_num_bits
);
3330 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->common
.isa_bits
);
3331 arm_parse_option_features (cpu_isa
, &arm_selected_cpu
->common
,
3333 bitmap_xor (isa_delta
, cpu_isa
, target
->isa
);
3334 /* Ignore any bits that are quirk bits. */
3335 bitmap_and_compl (isa_delta
, isa_delta
, isa_quirkbits
);
3336 /* If the user (or the default configuration) has specified a
3337 specific FPU, then ignore any bits that depend on the FPU
3338 configuration. Do similarly if using the soft-float
3340 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
3341 || arm_float_abi
== ARM_FLOAT_ABI_SOFT
)
3342 bitmap_and_compl (isa_delta
, isa_delta
, isa_all_fpbits
);
3344 if (!bitmap_empty_p (isa_delta
))
3346 if (warn_compatible
)
3347 warning (0, "switch %<-mcpu=%s%> conflicts "
3348 "with switch %<-march=%s%>",
3349 opts
->x_arm_cpu_string
,
3350 opts
->x_arm_arch_string
);
3352 /* -march wins for code generation.
3353 -mcpu wins for default tuning. */
3354 if (!arm_selected_tune
)
3355 arm_selected_tune
= arm_selected_cpu
;
3357 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3358 target
->arch_name
= arm_selected_arch
->common
.name
;
3362 /* Architecture and CPU are essentially the same.
3363 Prefer the CPU setting. */
3364 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3365 target
->core_name
= arm_selected_cpu
->common
.name
;
3366 /* Copy the CPU's capabilities, so that we inherit the
3367 appropriate extensions and quirks. */
3368 bitmap_copy (target
->isa
, cpu_isa
);
3373 /* Pick a CPU based on the architecture. */
3374 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3375 target
->arch_name
= arm_selected_arch
->common
.name
;
3376 /* Note: target->core_name is left unset in this path. */
3379 else if (arm_selected_cpu
)
3381 target
->core_name
= arm_selected_cpu
->common
.name
;
3382 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3383 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3385 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3387 /* If the user did not specify a processor or architecture, choose
3391 const cpu_option
*sel
;
3392 auto_sbitmap
sought_isa (isa_num_bits
);
3393 bitmap_clear (sought_isa
);
3394 auto_sbitmap
default_isa (isa_num_bits
);
3396 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "default CPU",
3397 TARGET_CPU_DEFAULT
);
3398 cpu_opts
= strchr (TARGET_CPU_DEFAULT
, '+');
3399 gcc_assert (arm_selected_cpu
->common
.name
);
3401 /* RWE: All of the selection logic below (to the end of this
3402 'if' clause) looks somewhat suspect. It appears to be mostly
3403 there to support forcing thumb support when the default CPU
3404 does not have thumb (somewhat dubious in terms of what the
3405 user might be expecting). I think it should be removed once
3406 support for the pre-thumb era cores is removed. */
3407 sel
= arm_selected_cpu
;
3408 arm_initialize_isa (default_isa
, sel
->common
.isa_bits
);
3409 arm_parse_option_features (default_isa
, &arm_selected_cpu
->common
,
3412 /* Now check to see if the user has specified any command line
3413 switches that require certain abilities from the cpu. */
3415 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3416 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3418 /* If there are such requirements and the default CPU does not
3419 satisfy them, we need to run over the complete list of
3420 cores looking for one that is satisfactory. */
3421 if (!bitmap_empty_p (sought_isa
)
3422 && !bitmap_subset_p (sought_isa
, default_isa
))
3424 auto_sbitmap
candidate_isa (isa_num_bits
);
3425 /* We're only interested in a CPU with at least the
3426 capabilities of the default CPU and the required
3427 additional features. */
3428 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3430 /* Try to locate a CPU type that supports all of the abilities
3431 of the default CPU, plus the extra abilities requested by
3433 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3435 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3436 /* An exact match? */
3437 if (bitmap_equal_p (default_isa
, candidate_isa
))
3441 if (sel
->common
.name
== NULL
)
3443 unsigned current_bit_count
= isa_num_bits
;
3444 const cpu_option
*best_fit
= NULL
;
3446 /* Ideally we would like to issue an error message here
3447 saying that it was not possible to find a CPU compatible
3448 with the default CPU, but which also supports the command
3449 line options specified by the programmer, and so they
3450 ought to use the -mcpu=<name> command line option to
3451 override the default CPU type.
3453 If we cannot find a CPU that has exactly the
3454 characteristics of the default CPU and the given
3455 command line options we scan the array again looking
3456 for a best match. The best match must have at least
3457 the capabilities of the perfect match. */
3458 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3460 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3462 if (bitmap_subset_p (default_isa
, candidate_isa
))
3466 bitmap_and_compl (candidate_isa
, candidate_isa
,
3468 count
= bitmap_popcount (candidate_isa
);
3470 if (count
< current_bit_count
)
3473 current_bit_count
= count
;
3477 gcc_assert (best_fit
);
3481 arm_selected_cpu
= sel
;
3484 /* Now we know the CPU, we can finally initialize the target
3486 target
->core_name
= arm_selected_cpu
->common
.name
;
3487 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3488 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3490 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3493 gcc_assert (arm_selected_cpu
);
3494 gcc_assert (arm_selected_arch
);
3496 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3498 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3499 auto_sbitmap
fpu_bits (isa_num_bits
);
3501 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3502 /* This should clear out ALL bits relating to the FPU/simd
3503 extensions, to avoid potentially invalid combinations later on
3504 that we can't match. At present we only clear out those bits
3505 that can be set by -mfpu. This should be fixed in GCC-12. */
3506 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits_internal
);
3507 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3510 /* If we have the soft-float ABI, clear any feature bits relating to use of
3511 floating-point operations. They'll just confuse things later on. */
3512 if (arm_float_abi
== ARM_FLOAT_ABI_SOFT
)
3513 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpbits
);
3515 /* There may be implied bits which we still need to enable. These are
3516 non-named features which are needed to complete other sets of features,
3517 but cannot be enabled from arm-cpus.in due to being shared between
3518 multiple fgroups. Each entry in all_implied_fbits is of the form
3519 ante -> cons, meaning that if the feature "ante" is enabled, we should
3520 implicitly enable "cons". */
3521 const struct fbit_implication
*impl
= all_implied_fbits
;
3524 if (bitmap_bit_p (target
->isa
, impl
->ante
))
3525 bitmap_set_bit (target
->isa
, impl
->cons
);
3529 if (!arm_selected_tune
)
3530 arm_selected_tune
= arm_selected_cpu
;
3531 else /* Validate the features passed to -mtune. */
3532 arm_parse_option_features (NULL
, &arm_selected_tune
->common
, tune_opts
);
3534 const cpu_tune
*tune_data
= &all_tunes
[arm_selected_tune
- all_cores
];
3536 /* Finish initializing the target structure. */
3537 if (!target
->arch_name
)
3538 target
->arch_name
= arm_selected_arch
->common
.name
;
3539 target
->arch_pp_name
= arm_selected_arch
->arch
;
3540 target
->base_arch
= arm_selected_arch
->base_arch
;
3541 target
->profile
= arm_selected_arch
->profile
;
3543 target
->tune_flags
= tune_data
->tune_flags
;
3544 target
->tune
= tune_data
->tune
;
3545 target
->tune_core
= tune_data
->scheduler
;
3548 /* Fix up any incompatible options that the user has specified. */
3550 arm_option_override (void)
3552 static const enum isa_feature fpu_bitlist_internal
[]
3553 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
3554 /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main. */
3555 static const enum isa_feature fp_bitlist
[]
3556 = { ISA_ALL_FP
, isa_bit_mve_float
, isa_nobit
};
3557 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3558 cl_target_option opts
;
3560 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3561 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3563 isa_all_fpubits_internal
= sbitmap_alloc (isa_num_bits
);
3564 isa_all_fpbits
= sbitmap_alloc (isa_num_bits
);
3565 arm_initialize_isa (isa_all_fpubits_internal
, fpu_bitlist_internal
);
3566 arm_initialize_isa (isa_all_fpbits
, fp_bitlist
);
3568 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3570 if (!OPTION_SET_P (arm_fpu_index
))
3575 ok
= opt_enum_arg_to_value (OPT_mfpu_
, FPUTYPE_AUTO
, &fpu_index
,
3578 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3581 cl_target_option_save (&opts
, &global_options
, &global_options_set
);
3582 arm_configure_build_target (&arm_active_target
, &opts
, true);
3584 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3585 SUBTARGET_OVERRIDE_OPTIONS
;
3588 /* Initialize boolean versions of the architectural flags, for use
3589 in the arm.md file and for enabling feature flags. */
3590 arm_option_reconfigure_globals ();
3592 arm_tune
= arm_active_target
.tune_core
;
3593 tune_flags
= arm_active_target
.tune_flags
;
3594 current_tune
= arm_active_target
.tune
;
3596 /* TBD: Dwarf info for apcs frame is not handled yet. */
3597 if (TARGET_APCS_FRAME
)
3598 flag_shrink_wrap
= false;
3600 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3602 warning (0, "%<-mapcs-stack-check%> incompatible with "
3603 "%<-mno-apcs-frame%>");
3604 target_flags
|= MASK_APCS_FRAME
;
3607 if (TARGET_POKE_FUNCTION_NAME
)
3608 target_flags
|= MASK_APCS_FRAME
;
3610 if (TARGET_APCS_REENT
&& flag_pic
)
3611 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3613 if (TARGET_APCS_REENT
)
3614 warning (0, "APCS reentrant code not supported. Ignored");
3616 /* Set up some tuning parameters. */
3617 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3618 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3619 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3620 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3621 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3622 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3624 /* For arm2/3 there is no need to do any scheduling if we are doing
3625 software floating-point. */
3626 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3627 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3629 /* Override the default structure alignment for AAPCS ABI. */
3630 if (!OPTION_SET_P (arm_structure_size_boundary
))
3632 if (TARGET_AAPCS_BASED
)
3633 arm_structure_size_boundary
= 8;
3637 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3639 if (arm_structure_size_boundary
!= 8
3640 && arm_structure_size_boundary
!= 32
3641 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3643 if (ARM_DOUBLEWORD_ALIGN
)
3645 "structure size boundary can only be set to 8, 32 or 64");
3647 warning (0, "structure size boundary can only be set to 8 or 32");
3648 arm_structure_size_boundary
3649 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3653 if (TARGET_VXWORKS_RTP
)
3655 if (!OPTION_SET_P (arm_pic_data_is_text_relative
))
3656 arm_pic_data_is_text_relative
= 0;
3659 && !arm_pic_data_is_text_relative
3660 && !(OPTION_SET_P (target_flags
) & MASK_SINGLE_PIC_BASE
))
3661 /* When text & data segments don't have a fixed displacement, the
3662 intended use is with a single, read only, pic base register.
3663 Unless the user explicitly requested not to do that, set
3665 target_flags
|= MASK_SINGLE_PIC_BASE
;
3667 /* If stack checking is disabled, we can use r10 as the PIC register,
3668 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3669 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3671 if (TARGET_VXWORKS_RTP
)
3672 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3673 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3676 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3677 arm_pic_register
= 9;
3679 /* If in FDPIC mode then force arm_pic_register to be r9. */
3682 arm_pic_register
= FDPIC_REGNUM
;
3684 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3687 if (arm_pic_register_string
!= NULL
)
3689 int pic_register
= decode_reg_name (arm_pic_register_string
);
3692 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3694 /* Prevent the user from choosing an obviously stupid PIC register. */
3695 else if (pic_register
< 0 || call_used_or_fixed_reg_p (pic_register
)
3696 || pic_register
== HARD_FRAME_POINTER_REGNUM
3697 || pic_register
== STACK_POINTER_REGNUM
3698 || pic_register
>= PC_REGNUM
3699 || (TARGET_VXWORKS_RTP
3700 && (unsigned int) pic_register
!= arm_pic_register
))
3701 error ("unable to use %qs for PIC register", arm_pic_register_string
);
3703 arm_pic_register
= pic_register
;
3707 target_word_relocations
= 1;
3709 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3710 if (fix_cm3_ldrd
== 2)
3712 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_cm3_ldrd
))
3718 /* Enable fix_vlldm by default if required. */
3721 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_vlldm
))
3727 /* Enable fix_aes by default if required. */
3728 if (fix_aes_erratum_1742098
== 2)
3730 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_aes_1742098
))
3731 fix_aes_erratum_1742098
= 1;
3733 fix_aes_erratum_1742098
= 0;
3736 /* Hot/Cold partitioning is not currently supported, since we can't
3737 handle literal pool placement in that case. */
3738 if (flag_reorder_blocks_and_partition
)
3740 inform (input_location
,
3741 "%<-freorder-blocks-and-partition%> not supported "
3742 "on this architecture");
3743 flag_reorder_blocks_and_partition
= 0;
3744 flag_reorder_blocks
= 1;
3748 /* Hoisting PIC address calculations more aggressively provides a small,
3749 but measurable, size reduction for PIC code. Therefore, we decrease
3750 the bar for unrestricted expression hoisting to the cost of PIC address
3751 calculation, which is 2 instructions. */
3752 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3753 param_gcse_unrestricted_cost
, 2);
3755 /* ARM EABI defaults to strict volatile bitfields. */
3756 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3757 && abi_version_at_least(2))
3758 flag_strict_volatile_bitfields
= 1;
3760 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3761 have deemed it beneficial (signified by setting
3762 prefetch.num_slots to 1 or more). */
3763 if (flag_prefetch_loop_arrays
< 0
3766 && current_tune
->prefetch
.num_slots
> 0)
3767 flag_prefetch_loop_arrays
= 1;
3769 /* Set up parameters to be used in prefetching algorithm. Do not
3770 override the defaults unless we are tuning for a core we have
3771 researched values for. */
3772 if (current_tune
->prefetch
.num_slots
> 0)
3773 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3774 param_simultaneous_prefetches
,
3775 current_tune
->prefetch
.num_slots
);
3776 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3777 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3778 param_l1_cache_line_size
,
3779 current_tune
->prefetch
.l1_cache_line_size
);
3780 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3782 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3783 param_destruct_interfere_size
,
3784 current_tune
->prefetch
.l1_cache_line_size
);
3785 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3786 param_construct_interfere_size
,
3787 current_tune
->prefetch
.l1_cache_line_size
);
3791 /* For a generic ARM target, JF Bastien proposed using 64 for both. */
3792 /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3794 /* More recent Cortex chips have a 64-byte cache line, but are marked
3795 ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults. */
3796 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3797 param_destruct_interfere_size
, 64);
3798 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3799 param_construct_interfere_size
, 64);
3802 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3803 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3804 param_l1_cache_size
,
3805 current_tune
->prefetch
.l1_cache_size
);
3807 /* Look through ready list and all of queue for instructions
3808 relevant for L2 auto-prefetcher. */
3809 int sched_autopref_queue_depth
;
3811 switch (current_tune
->sched_autopref
)
3813 case tune_params::SCHED_AUTOPREF_OFF
:
3814 sched_autopref_queue_depth
= -1;
3817 case tune_params::SCHED_AUTOPREF_RANK
:
3818 sched_autopref_queue_depth
= 0;
3821 case tune_params::SCHED_AUTOPREF_FULL
:
3822 sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3829 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3830 param_sched_autopref_queue_depth
,
3831 sched_autopref_queue_depth
);
3833 /* Currently, for slow flash data, we just disable literal pools. We also
3834 disable it for pure-code. */
3835 if (target_slow_flash_data
|| target_pure_code
)
3836 arm_disable_literal_pool
= true;
3838 /* Disable scheduling fusion by default if it's not armv7 processor
3839 or doesn't prefer ldrd/strd. */
3840 if (flag_schedule_fusion
== 2
3841 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3842 flag_schedule_fusion
= 0;
3844 /* Need to remember initial options before they are overriden. */
3845 init_optimize
= build_optimization_node (&global_options
,
3846 &global_options_set
);
3848 arm_options_perform_arch_sanity_checks ();
3849 arm_option_override_internal (&global_options
, &global_options_set
);
3850 arm_option_check_internal (&global_options
);
3851 arm_option_params_internal ();
3853 /* Create the default target_options structure. */
3854 target_option_default_node
= target_option_current_node
3855 = build_target_option_node (&global_options
, &global_options_set
);
3857 /* Register global variables with the garbage collector. */
3858 arm_add_gc_roots ();
3860 /* Init initial mode for testing. */
3861 thumb_flipper
= TARGET_THUMB
;
3865 /* Reconfigure global status flags from the active_target.isa. */
3867 arm_option_reconfigure_globals (void)
3869 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3870 arm_base_arch
= arm_active_target
.base_arch
;
3872 /* Initialize boolean versions of the architectural flags, for use
3873 in the arm.md file. */
3874 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv4
);
3875 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3876 arm_arch5t
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5t
);
3877 arm_arch5te
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5te
);
3878 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6
);
3879 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6k
);
3880 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3881 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3882 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7
);
3883 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7em
);
3884 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8
);
3885 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_1
);
3886 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_2
);
3887 arm_arch8_3
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_3
);
3888 arm_arch8_4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_4
);
3889 arm_arch8_1m_main
= bitmap_bit_p (arm_active_target
.isa
,
3890 isa_bit_armv8_1m_main
);
3891 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3892 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3893 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3894 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3895 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3896 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3897 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3898 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3899 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3900 arm_arch8m_main
= arm_arch7
&& arm_arch_cmse
;
3901 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3902 arm_arch_i8mm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_i8mm
);
3903 arm_arch_bf16
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_bf16
);
3905 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3908 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3909 error ("selected fp16 options are incompatible");
3910 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3914 arm_arch_cde_coproc
= 0;
3915 int cde_bits
[] = {isa_bit_cdecp0
, isa_bit_cdecp1
, isa_bit_cdecp2
,
3916 isa_bit_cdecp3
, isa_bit_cdecp4
, isa_bit_cdecp5
,
3917 isa_bit_cdecp6
, isa_bit_cdecp7
};
3918 for (int i
= 0, e
= ARRAY_SIZE (cde_bits
); i
< e
; i
++)
3920 int cde_bit
= bitmap_bit_p (arm_active_target
.isa
, cde_bits
[i
]);
3923 arm_arch_cde
|= cde_bit
;
3924 arm_arch_cde_coproc
|= arm_arch_cde_coproc_bits
[i
];
3928 /* And finally, set up some quirks. */
3929 arm_arch_no_volatile_ce
3930 = bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_volatile_ce
);
3931 arm_arch6kz
= arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
,
3932 isa_bit_quirk_armv6kz
);
3934 /* Use the cp15 method if it is available. */
3935 if (target_thread_pointer
== TP_AUTO
)
3937 if (arm_arch6k
&& !TARGET_THUMB1
)
3938 target_thread_pointer
= TP_TPIDRURO
;
3940 target_thread_pointer
= TP_SOFT
;
3943 if (!TARGET_HARD_TP
&& arm_stack_protector_guard
== SSP_TLSREG
)
3944 error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3947 /* Perform some validation between the desired architecture and the rest of the
3950 arm_options_perform_arch_sanity_checks (void)
3952 /* V5T code we generate is completely interworking capable, so we turn off
3953 TARGET_INTERWORK here to avoid many tests later on. */
3955 /* XXX However, we must pass the right pre-processor defines to CPP
3956 or GLD can get confused. This is a hack. */
3957 if (TARGET_INTERWORK
)
3958 arm_cpp_interwork
= 1;
3961 target_flags
&= ~MASK_INTERWORK
;
3963 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3964 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3966 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3967 error ("iwmmxt abi requires an iwmmxt capable cpu");
3969 /* BPABI targets use linker tricks to allow interworking on cores
3970 without thumb support. */
3971 if (TARGET_INTERWORK
3973 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3975 warning (0, "target CPU does not support interworking" );
3976 target_flags
&= ~MASK_INTERWORK
;
3979 /* If soft-float is specified then don't use FPU. */
3980 if (TARGET_SOFT_FLOAT
)
3981 arm_fpu_attr
= FPU_NONE
;
3983 arm_fpu_attr
= FPU_VFP
;
3985 if (TARGET_AAPCS_BASED
)
3987 if (TARGET_CALLER_INTERWORKING
)
3988 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3990 if (TARGET_CALLEE_INTERWORKING
)
3991 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3994 /* __fp16 support currently assumes the core has ldrh. */
3995 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3996 sorry ("%<__fp16%> and no ldrh");
3998 if (use_cmse
&& !arm_arch_cmse
)
3999 error ("target CPU does not support ARMv8-M Security Extensions");
4001 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
4002 and ARMv8-M Baseline and Mainline do not allow such configuration. */
4003 if (use_cmse
&& TARGET_HARD_FLOAT
&& LAST_VFP_REGNUM
> LAST_LO_VFP_REGNUM
)
4004 error ("ARMv8-M Security Extensions incompatible with selected FPU");
4007 if (TARGET_AAPCS_BASED
)
4009 if (arm_abi
== ARM_ABI_IWMMXT
)
4010 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
4011 else if (TARGET_HARD_FLOAT_ABI
)
4013 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
4014 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_vfpv2
)
4015 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_mve
))
4016 error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
4019 arm_pcs_default
= ARM_PCS_AAPCS
;
4023 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
4024 sorry ("%<-mfloat-abi=hard%> and VFP");
4026 if (arm_abi
== ARM_ABI_APCS
)
4027 arm_pcs_default
= ARM_PCS_APCS
;
4029 arm_pcs_default
= ARM_PCS_ATPCS
;
4033 /* Test whether a local function descriptor is canonical, i.e.,
4034 whether we can use GOTOFFFUNCDESC to compute the address of the
4037 arm_fdpic_local_funcdesc_p (rtx fnx
)
4040 enum symbol_visibility vis
;
4046 if (! SYMBOL_REF_LOCAL_P (fnx
))
4049 fn
= SYMBOL_REF_DECL (fnx
);
4054 vis
= DECL_VISIBILITY (fn
);
4056 if (vis
== VISIBILITY_PROTECTED
)
4057 /* Private function descriptors for protected functions are not
4058 canonical. Temporarily change the visibility to global so that
4059 we can ensure uniqueness of funcdesc pointers. */
4060 DECL_VISIBILITY (fn
) = VISIBILITY_DEFAULT
;
4062 ret
= default_binds_local_p_1 (fn
, flag_pic
);
4064 DECL_VISIBILITY (fn
) = vis
;
4070 arm_add_gc_roots (void)
4072 gcc_obstack_init(&minipool_obstack
);
4073 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
4076 /* A table of known ARM exception types.
4077 For use with the interrupt function attribute. */
4081 const char *const arg
;
4082 const unsigned long return_value
;
4086 static const isr_attribute_arg isr_attribute_args
[] =
4088 { "IRQ", ARM_FT_ISR
},
4089 { "irq", ARM_FT_ISR
},
4090 { "FIQ", ARM_FT_FIQ
},
4091 { "fiq", ARM_FT_FIQ
},
4092 { "ABORT", ARM_FT_ISR
},
4093 { "abort", ARM_FT_ISR
},
4094 { "UNDEF", ARM_FT_EXCEPTION
},
4095 { "undef", ARM_FT_EXCEPTION
},
4096 { "SWI", ARM_FT_EXCEPTION
},
4097 { "swi", ARM_FT_EXCEPTION
},
4098 { NULL
, ARM_FT_NORMAL
}
4101 /* Returns the (interrupt) function type of the current
4102 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
4104 static unsigned long
4105 arm_isr_value (tree argument
)
4107 const isr_attribute_arg
* ptr
;
4111 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
4113 /* No argument - default to IRQ. */
4114 if (argument
== NULL_TREE
)
4117 /* Get the value of the argument. */
4118 if (TREE_VALUE (argument
) == NULL_TREE
4119 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
4120 return ARM_FT_UNKNOWN
;
4122 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
4124 /* Check it against the list of known arguments. */
4125 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
4126 if (streq (arg
, ptr
->arg
))
4127 return ptr
->return_value
;
4129 /* An unrecognized interrupt type. */
4130 return ARM_FT_UNKNOWN
;
4133 /* Computes the type of the current function. */
4135 static unsigned long
4136 arm_compute_func_type (void)
4138 unsigned long type
= ARM_FT_UNKNOWN
;
4142 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
4144 /* Decide if the current function is volatile. Such functions
4145 never return, and many memory cycles can be saved by not storing
4146 register values that will never be needed again. This optimization
4147 was added to speed up context switching in a kernel application. */
4149 && (TREE_NOTHROW (current_function_decl
)
4150 || !(flag_unwind_tables
4152 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
4153 && TREE_THIS_VOLATILE (current_function_decl
))
4154 type
|= ARM_FT_VOLATILE
;
4156 if (cfun
->static_chain_decl
!= NULL
)
4157 type
|= ARM_FT_NESTED
;
4159 attr
= DECL_ATTRIBUTES (current_function_decl
);
4161 a
= lookup_attribute ("naked", attr
);
4163 type
|= ARM_FT_NAKED
;
4165 a
= lookup_attribute ("isr", attr
);
4167 a
= lookup_attribute ("interrupt", attr
);
4170 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
4172 type
|= arm_isr_value (TREE_VALUE (a
));
4174 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
4175 type
|= ARM_FT_CMSE_ENTRY
;
4180 /* Returns the type of the current function. */
4183 arm_current_func_type (void)
4185 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
4186 cfun
->machine
->func_type
= arm_compute_func_type ();
4188 return cfun
->machine
->func_type
;
4192 arm_allocate_stack_slots_for_args (void)
4194 /* Naked functions should not allocate stack slots for arguments. */
4195 return !IS_NAKED (arm_current_func_type ());
4199 arm_warn_func_return (tree decl
)
4201 /* Naked functions are implemented entirely in assembly, including the
4202 return sequence, so suppress warnings about this. */
4203 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
4207 /* Output assembler code for a block containing the constant parts
4208 of a trampoline, leaving space for the variable parts.
4210 On the ARM, (if r8 is the static chain regnum, and remembering that
4211 referencing pc adds an offset of 8) the trampoline looks like:
4214 .word static chain value
4215 .word function's address
4216 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4218 In FDPIC mode, the trampoline looks like:
4219 .word trampoline address
4220 .word trampoline GOT address
4221 ldr r12, [pc, #8] ; #4 for Arm mode
4222 ldr r9, [pc, #8] ; #4 for Arm mode
4223 ldr pc, [pc, #8] ; #4 for Arm mode
4224 .word static chain value
4226 .word function's address
4230 arm_asm_trampoline_template (FILE *f
)
4232 fprintf (f
, "\t.syntax unified\n");
4236 /* The first two words are a function descriptor pointing to the
4237 trampoline code just below. */
4239 fprintf (f
, "\t.arm\n");
4240 else if (TARGET_THUMB2
)
4241 fprintf (f
, "\t.thumb\n");
4243 /* Only ARM and Thumb-2 are supported. */
4246 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4247 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4248 /* Trampoline code which sets the static chain register but also
4249 PIC register before jumping into real code. */
4250 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4251 STATIC_CHAIN_REGNUM
, PC_REGNUM
,
4252 TARGET_THUMB2
? 8 : 4);
4253 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4254 PIC_OFFSET_TABLE_REGNUM
, PC_REGNUM
,
4255 TARGET_THUMB2
? 8 : 4);
4256 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4257 PC_REGNUM
, PC_REGNUM
,
4258 TARGET_THUMB2
? 8 : 4);
4259 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4261 else if (TARGET_ARM
)
4263 fprintf (f
, "\t.arm\n");
4264 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
4265 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
4267 else if (TARGET_THUMB2
)
4269 fprintf (f
, "\t.thumb\n");
4270 /* The Thumb-2 trampoline is similar to the arm implementation.
4271 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4272 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
4273 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
4274 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
4278 ASM_OUTPUT_ALIGN (f
, 2);
4279 fprintf (f
, "\t.code\t16\n");
4280 fprintf (f
, ".Ltrampoline_start:\n");
4281 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
4282 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4283 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
4284 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4285 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
4286 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
4288 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4289 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4292 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4295 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4297 rtx fnaddr
, mem
, a_tramp
;
4299 emit_block_move (m_tramp
, assemble_trampoline_template (),
4300 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
4304 rtx funcdesc
= XEXP (DECL_RTL (fndecl
), 0);
4305 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
4306 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
4307 /* The function start address is at offset 8, but in Thumb mode
4308 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4310 rtx trampoline_code_start
4311 = plus_constant (Pmode
, XEXP (m_tramp
, 0), TARGET_THUMB2
? 9 : 8);
4313 /* Write initial funcdesc which points to the trampoline. */
4314 mem
= adjust_address (m_tramp
, SImode
, 0);
4315 emit_move_insn (mem
, trampoline_code_start
);
4316 mem
= adjust_address (m_tramp
, SImode
, 4);
4317 emit_move_insn (mem
, gen_rtx_REG (Pmode
, PIC_OFFSET_TABLE_REGNUM
));
4318 /* Setup static chain. */
4319 mem
= adjust_address (m_tramp
, SImode
, 20);
4320 emit_move_insn (mem
, chain_value
);
4321 /* GOT + real function entry point. */
4322 mem
= adjust_address (m_tramp
, SImode
, 24);
4323 emit_move_insn (mem
, gotaddr
);
4324 mem
= adjust_address (m_tramp
, SImode
, 28);
4325 emit_move_insn (mem
, fnaddr
);
4329 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
4330 emit_move_insn (mem
, chain_value
);
4332 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
4333 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4334 emit_move_insn (mem
, fnaddr
);
4337 a_tramp
= XEXP (m_tramp
, 0);
4338 maybe_emit_call_builtin___clear_cache (a_tramp
,
4339 plus_constant (ptr_mode
,
4344 /* Thumb trampolines should be entered in thumb mode, so set
4345 the bottom bit of the address. */
4348 arm_trampoline_adjust_address (rtx addr
)
4350 /* For FDPIC don't fix trampoline address since it's a function
4351 descriptor and not a function address. */
4352 if (TARGET_THUMB
&& !TARGET_FDPIC
)
4353 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
4354 NULL
, 0, OPTAB_LIB_WIDEN
);
4358 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4359 includes call-clobbered registers too. If this is a leaf function
4360 we can just examine the registers used by the RTL, but otherwise we
4361 have to assume that whatever function is called might clobber
4362 anything, and so we have to save all the call-clobbered registers
4364 static inline bool reg_needs_saving_p (unsigned reg
)
4366 unsigned long func_type
= arm_current_func_type ();
4368 if (IS_INTERRUPT (func_type
))
4369 if (df_regs_ever_live_p (reg
)
4370 /* Save call-clobbered core registers. */
4371 || (! crtl
->is_leaf
&& call_used_or_fixed_reg_p (reg
) && reg
< FIRST_VFP_REGNUM
))
4376 if (!df_regs_ever_live_p (reg
)
4377 || call_used_or_fixed_reg_p (reg
))
4383 /* Return 1 if it is possible to return using a single instruction.
4384 If SIBLING is non-null, this is a test for a return before a sibling
4385 call. SIBLING is the call insn, so we can examine its register usage. */
4388 use_return_insn (int iscond
, rtx sibling
)
4391 unsigned int func_type
;
4392 unsigned long saved_int_regs
;
4393 unsigned HOST_WIDE_INT stack_adjust
;
4394 arm_stack_offsets
*offsets
;
4396 /* Never use a return instruction before reload has run. */
4397 if (!reload_completed
)
4400 /* Never use a return instruction when return address signing
4401 mechanism is enabled as it requires more than one
4403 if (arm_current_function_pac_enabled_p ())
4406 func_type
= arm_current_func_type ();
4408 /* Naked, volatile and stack alignment functions need special
4410 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
4413 /* So do interrupt functions that use the frame pointer and Thumb
4414 interrupt functions. */
4415 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
4418 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
4419 && !optimize_function_for_size_p (cfun
))
4422 offsets
= arm_get_frame_offsets ();
4423 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
4425 /* As do variadic functions. */
4426 if (crtl
->args
.pretend_args_size
4427 || cfun
->machine
->uses_anonymous_args
4428 /* Or if the function calls __builtin_eh_return () */
4429 || crtl
->calls_eh_return
4430 /* Or if the function calls alloca */
4431 || cfun
->calls_alloca
4432 /* Or if there is a stack adjustment. However, if the stack pointer
4433 is saved on the stack, we can use a pre-incrementing stack load. */
4434 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
4435 && stack_adjust
== 4))
4436 /* Or if the static chain register was saved above the frame, under the
4437 assumption that the stack pointer isn't saved on the stack. */
4438 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
4439 && arm_compute_static_chain_stack_bytes() != 0))
4442 saved_int_regs
= offsets
->saved_regs_mask
;
4444 /* Unfortunately, the insn
4446 ldmib sp, {..., sp, ...}
4448 triggers a bug on most SA-110 based devices, such that the stack
4449 pointer won't be correctly restored if the instruction takes a
4450 page fault. We work around this problem by popping r3 along with
4451 the other registers, since that is never slower than executing
4452 another instruction.
4454 We test for !arm_arch5t here, because code for any architecture
4455 less than this could potentially be run on one of the buggy
4457 if (stack_adjust
== 4 && !arm_arch5t
&& TARGET_ARM
)
4459 /* Validate that r3 is a call-clobbered register (always true in
4460 the default abi) ... */
4461 if (!call_used_or_fixed_reg_p (3))
4464 /* ... that it isn't being used for a return value ... */
4465 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
4468 /* ... or for a tail-call argument ... */
4471 gcc_assert (CALL_P (sibling
));
4473 if (find_regno_fusage (sibling
, USE
, 3))
4477 /* ... and that there are no call-saved registers in r0-r2
4478 (always true in the default ABI). */
4479 if (saved_int_regs
& 0x7)
4483 /* Can't be done if interworking with Thumb, and any registers have been
4485 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
4488 /* On StrongARM, conditional returns are expensive if they aren't
4489 taken and multiple registers have been stacked. */
4490 if (iscond
&& arm_tune_strongarm
)
4492 /* Conditional return when just the LR is stored is a simple
4493 conditional-load instruction, that's not expensive. */
4494 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
4498 && arm_pic_register
!= INVALID_REGNUM
4499 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4503 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4504 several instructions if anything needs to be popped. Armv8.1-M Mainline
4505 also needs several instructions to save and restore FP context. */
4506 if (IS_CMSE_ENTRY (func_type
) && (saved_int_regs
|| TARGET_HAVE_FPCXT_CMSE
))
4509 /* If there are saved registers but the LR isn't saved, then we need
4510 two instructions for the return. */
4511 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4514 /* Can't be done if any of the VFP regs are pushed,
4515 since this also requires an insn. */
4516 if (TARGET_VFP_BASE
)
4517 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4518 if (reg_needs_saving_p (regno
))
4521 if (TARGET_REALLY_IWMMXT
)
4522 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4523 if (reg_needs_saving_p (regno
))
4529 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4530 shrink-wrapping if possible. This is the case if we need to emit a
4531 prologue, which we can test by looking at the offsets. */
4533 use_simple_return_p (void)
4535 arm_stack_offsets
*offsets
;
4537 /* Note this function can be called before or after reload. */
4538 if (!reload_completed
)
4539 arm_compute_frame_layout ();
4541 offsets
= arm_get_frame_offsets ();
4542 return offsets
->outgoing_args
!= 0;
4545 /* Return TRUE if int I is a valid immediate ARM constant. */
4548 const_ok_for_arm (HOST_WIDE_INT i
)
4552 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4553 be all zero, or all one. */
4554 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4555 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4556 != ((~(unsigned HOST_WIDE_INT
) 0)
4557 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4560 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4562 /* Fast return for 0 and small values. We must do this for zero, since
4563 the code below can't handle that one case. */
4564 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4567 /* Get the number of trailing zeros. */
4568 lowbit
= ffs((int) i
) - 1;
4570 /* Only even shifts are allowed in ARM mode so round down to the
4571 nearest even number. */
4575 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4580 /* Allow rotated constants in ARM mode. */
4582 && ((i
& ~0xc000003f) == 0
4583 || (i
& ~0xf000000f) == 0
4584 || (i
& ~0xfc000003) == 0))
4587 else if (TARGET_THUMB2
)
4591 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4594 if (i
== v
|| i
== (v
| (v
<< 8)))
4597 /* Allow repeated pattern 0xXY00XY00. */
4603 else if (TARGET_HAVE_MOVT
)
4605 /* Thumb-1 Targets with MOVT. */
4615 /* Return true if I is a valid constant for the operation CODE. */
4617 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4619 if (const_ok_for_arm (i
))
4625 /* See if we can use movw. */
4626 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4629 /* Otherwise, try mvn. */
4630 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4633 /* See if we can use addw or subw. */
4635 && ((i
& 0xfffff000) == 0
4636 || ((-i
) & 0xfffff000) == 0))
4657 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4659 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4665 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4669 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4676 /* Return true if I is a valid di mode constant for the operation CODE. */
4678 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4680 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4681 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4682 rtx hi
= GEN_INT (hi_val
);
4683 rtx lo
= GEN_INT (lo_val
);
4693 return const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF
4694 || const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF;
4696 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4703 /* Emit a sequence of insns to handle a large constant.
4704 CODE is the code of the operation required, it can be any of SET, PLUS,
4705 IOR, AND, XOR, MINUS;
4706 MODE is the mode in which the operation is being performed;
4707 VAL is the integer to operate on;
4708 SOURCE is the other operand (a register, or a null-pointer for SET);
4709 SUBTARGETS means it is safe to create scratch registers if that will
4710 either produce a simpler sequence, or we will want to cse the values.
4711 Return value is the number of insns emitted. */
4713 /* ??? Tweak this for thumb2. */
4715 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4716 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4720 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4721 cond
= COND_EXEC_TEST (PATTERN (insn
));
4725 if (subtargets
|| code
== SET
4726 || (REG_P (target
) && REG_P (source
)
4727 && REGNO (target
) != REGNO (source
)))
4729 /* After arm_reorg has been called, we can't fix up expensive
4730 constants by pushing them into memory so we must synthesize
4731 them in-line, regardless of the cost. This is only likely to
4732 be more costly on chips that have load delay slots and we are
4733 compiling without running the scheduler (so no splitting
4734 occurred before the final instruction emission).
4736 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4738 if (!cfun
->machine
->after_arm_reorg
4740 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4742 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4747 /* Currently SET is the only monadic value for CODE, all
4748 the rest are diadic. */
4749 if (TARGET_USE_MOVT
)
4750 arm_emit_movpair (target
, GEN_INT (val
));
4752 emit_set_insn (target
, GEN_INT (val
));
4758 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4760 if (TARGET_USE_MOVT
)
4761 arm_emit_movpair (temp
, GEN_INT (val
));
4763 emit_set_insn (temp
, GEN_INT (val
));
4765 /* For MINUS, the value is subtracted from, since we never
4766 have subtraction of a constant. */
4768 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4770 emit_set_insn (target
,
4771 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4777 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4781 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4782 ARM/THUMB2 immediates, and add up to VAL.
4783 Thr function return value gives the number of insns required. */
4785 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4786 struct four_ints
*return_sequence
)
4788 int best_consecutive_zeros
= 0;
4792 struct four_ints tmp_sequence
;
4794 /* If we aren't targeting ARM, the best place to start is always at
4795 the bottom, otherwise look more closely. */
4798 for (i
= 0; i
< 32; i
+= 2)
4800 int consecutive_zeros
= 0;
4802 if (!(val
& (3 << i
)))
4804 while ((i
< 32) && !(val
& (3 << i
)))
4806 consecutive_zeros
+= 2;
4809 if (consecutive_zeros
> best_consecutive_zeros
)
4811 best_consecutive_zeros
= consecutive_zeros
;
4812 best_start
= i
- consecutive_zeros
;
4819 /* So long as it won't require any more insns to do so, it's
4820 desirable to emit a small constant (in bits 0...9) in the last
4821 insn. This way there is more chance that it can be combined with
4822 a later addressing insn to form a pre-indexed load or store
4823 operation. Consider:
4825 *((volatile int *)0xe0000100) = 1;
4826 *((volatile int *)0xe0000110) = 2;
4828 We want this to wind up as:
4832 str rB, [rA, #0x100]
4834 str rB, [rA, #0x110]
4836 rather than having to synthesize both large constants from scratch.
4838 Therefore, we calculate how many insns would be required to emit
4839 the constant starting from `best_start', and also starting from
4840 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4841 yield a shorter sequence, we may as well use zero. */
4842 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4844 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4846 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4847 if (insns2
<= insns1
)
4849 *return_sequence
= tmp_sequence
;
4857 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4859 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4860 struct four_ints
*return_sequence
, int i
)
4862 int remainder
= val
& 0xffffffff;
4865 /* Try and find a way of doing the job in either two or three
4868 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4869 location. We start at position I. This may be the MSB, or
4870 optimial_immediate_sequence may have positioned it at the largest block
4871 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4872 wrapping around to the top of the word when we drop off the bottom.
4873 In the worst case this code should produce no more than four insns.
4875 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4876 constants, shifted to any arbitrary location. We should always start
4881 unsigned int b1
, b2
, b3
, b4
;
4882 unsigned HOST_WIDE_INT result
;
4885 gcc_assert (insns
< 4);
4890 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4891 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4894 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4895 /* We can use addw/subw for the last 12 bits. */
4899 /* Use an 8-bit shifted/rotated immediate. */
4903 result
= remainder
& ((0x0ff << end
)
4904 | ((i
< end
) ? (0xff >> (32 - end
))
4911 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4912 arbitrary shifts. */
4913 i
-= TARGET_ARM
? 2 : 1;
4917 /* Next, see if we can do a better job with a thumb2 replicated
4920 We do it this way around to catch the cases like 0x01F001E0 where
4921 two 8-bit immediates would work, but a replicated constant would
4924 TODO: 16-bit constants that don't clear all the bits, but still win.
4925 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4928 b1
= (remainder
& 0xff000000) >> 24;
4929 b2
= (remainder
& 0x00ff0000) >> 16;
4930 b3
= (remainder
& 0x0000ff00) >> 8;
4931 b4
= remainder
& 0xff;
4935 /* The 8-bit immediate already found clears b1 (and maybe b2),
4936 but must leave b3 and b4 alone. */
4938 /* First try to find a 32-bit replicated constant that clears
4939 almost everything. We can assume that we can't do it in one,
4940 or else we wouldn't be here. */
4941 unsigned int tmp
= b1
& b2
& b3
& b4
;
4942 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4944 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4945 + (tmp
== b3
) + (tmp
== b4
);
4947 && (matching_bytes
>= 3
4948 || (matching_bytes
== 2
4949 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4951 /* At least 3 of the bytes match, and the fourth has at
4952 least as many bits set, or two of the bytes match
4953 and it will only require one more insn to finish. */
4961 /* Second, try to find a 16-bit replicated constant that can
4962 leave three of the bytes clear. If b2 or b4 is already
4963 zero, then we can. If the 8-bit from above would not
4964 clear b2 anyway, then we still win. */
4965 else if (b1
== b3
&& (!b2
|| !b4
4966 || (remainder
& 0x00ff0000 & ~result
)))
4968 result
= remainder
& 0xff00ff00;
4974 /* The 8-bit immediate already found clears b2 (and maybe b3)
4975 and we don't get here unless b1 is alredy clear, but it will
4976 leave b4 unchanged. */
4978 /* If we can clear b2 and b4 at once, then we win, since the
4979 8-bits couldn't possibly reach that far. */
4982 result
= remainder
& 0x00ff00ff;
4988 return_sequence
->i
[insns
++] = result
;
4989 remainder
&= ~result
;
4991 if (code
== SET
|| code
== MINUS
)
4999 /* Emit an instruction with the indicated PATTERN. If COND is
5000 non-NULL, conditionalize the execution of the instruction on COND
5004 emit_constant_insn (rtx cond
, rtx pattern
)
5007 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
5008 emit_insn (pattern
);
5011 /* As above, but extra parameter GENERATE which, if clear, suppresses
5015 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
5016 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
5017 int subtargets
, int generate
)
5021 int final_invert
= 0;
5023 int set_sign_bit_copies
= 0;
5024 int clear_sign_bit_copies
= 0;
5025 int clear_zero_bit_copies
= 0;
5026 int set_zero_bit_copies
= 0;
5027 int insns
= 0, neg_insns
, inv_insns
;
5028 unsigned HOST_WIDE_INT temp1
, temp2
;
5029 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
5030 struct four_ints
*immediates
;
5031 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
5033 /* Find out which operations are safe for a given CODE. Also do a quick
5034 check for degenerate cases; these can occur when DImode operations
5047 if (remainder
== 0xffffffff)
5050 emit_constant_insn (cond
,
5051 gen_rtx_SET (target
,
5052 GEN_INT (ARM_SIGN_EXTEND (val
))));
5058 if (reload_completed
&& rtx_equal_p (target
, source
))
5062 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5071 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
5074 if (remainder
== 0xffffffff)
5076 if (reload_completed
&& rtx_equal_p (target
, source
))
5079 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5088 if (reload_completed
&& rtx_equal_p (target
, source
))
5091 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5095 if (remainder
== 0xffffffff)
5098 emit_constant_insn (cond
,
5099 gen_rtx_SET (target
,
5100 gen_rtx_NOT (mode
, source
)));
5107 /* We treat MINUS as (val - source), since (source - val) is always
5108 passed as (source + (-val)). */
5112 emit_constant_insn (cond
,
5113 gen_rtx_SET (target
,
5114 gen_rtx_NEG (mode
, source
)));
5117 if (const_ok_for_arm (val
))
5120 emit_constant_insn (cond
,
5121 gen_rtx_SET (target
,
5122 gen_rtx_MINUS (mode
, GEN_INT (val
),
5133 /* If we can do it in one insn get out quickly. */
5134 if (const_ok_for_op (val
, code
))
5137 emit_constant_insn (cond
,
5138 gen_rtx_SET (target
,
5140 ? gen_rtx_fmt_ee (code
, mode
, source
,
5146 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5148 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
5149 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
5153 if (mode
== SImode
&& i
== 16)
5154 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5156 emit_constant_insn (cond
,
5157 gen_zero_extendhisi2
5158 (target
, gen_lowpart (HImode
, source
)));
5160 /* Extz only supports SImode, but we can coerce the operands
5162 emit_constant_insn (cond
,
5163 gen_extzv_t2 (gen_lowpart (SImode
, target
),
5164 gen_lowpart (SImode
, source
),
5165 GEN_INT (i
), const0_rtx
));
5171 /* Calculate a few attributes that may be useful for specific
5173 /* Count number of leading zeros. */
5174 for (i
= 31; i
>= 0; i
--)
5176 if ((remainder
& (1 << i
)) == 0)
5177 clear_sign_bit_copies
++;
5182 /* Count number of leading 1's. */
5183 for (i
= 31; i
>= 0; i
--)
5185 if ((remainder
& (1 << i
)) != 0)
5186 set_sign_bit_copies
++;
5191 /* Count number of trailing zero's. */
5192 for (i
= 0; i
<= 31; i
++)
5194 if ((remainder
& (1 << i
)) == 0)
5195 clear_zero_bit_copies
++;
5200 /* Count number of trailing 1's. */
5201 for (i
= 0; i
<= 31; i
++)
5203 if ((remainder
& (1 << i
)) != 0)
5204 set_zero_bit_copies
++;
5212 /* See if we can do this by sign_extending a constant that is known
5213 to be negative. This is a good, way of doing it, since the shift
5214 may well merge into a subsequent insn. */
5215 if (set_sign_bit_copies
> 1)
5217 if (const_ok_for_arm
5218 (temp1
= ARM_SIGN_EXTEND (remainder
5219 << (set_sign_bit_copies
- 1))))
5223 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5224 emit_constant_insn (cond
,
5225 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5226 emit_constant_insn (cond
,
5227 gen_ashrsi3 (target
, new_src
,
5228 GEN_INT (set_sign_bit_copies
- 1)));
5232 /* For an inverted constant, we will need to set the low bits,
5233 these will be shifted out of harm's way. */
5234 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
5235 if (const_ok_for_arm (~temp1
))
5239 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5240 emit_constant_insn (cond
,
5241 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5242 emit_constant_insn (cond
,
5243 gen_ashrsi3 (target
, new_src
,
5244 GEN_INT (set_sign_bit_copies
- 1)));
5250 /* See if we can calculate the value as the difference between two
5251 valid immediates. */
5252 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
5254 int topshift
= clear_sign_bit_copies
& ~1;
5256 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
5257 & (0xff000000 >> topshift
));
5259 /* If temp1 is zero, then that means the 9 most significant
5260 bits of remainder were 1 and we've caused it to overflow.
5261 When topshift is 0 we don't need to do anything since we
5262 can borrow from 'bit 32'. */
5263 if (temp1
== 0 && topshift
!= 0)
5264 temp1
= 0x80000000 >> (topshift
- 1);
5266 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
5268 if (const_ok_for_arm (temp2
))
5272 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5273 emit_constant_insn (cond
,
5274 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5275 emit_constant_insn (cond
,
5276 gen_addsi3 (target
, new_src
,
5284 /* See if we can generate this by setting the bottom (or the top)
5285 16 bits, and then shifting these into the other half of the
5286 word. We only look for the simplest cases, to do more would cost
5287 too much. Be careful, however, not to generate this when the
5288 alternative would take fewer insns. */
5289 if (val
& 0xffff0000)
5291 temp1
= remainder
& 0xffff0000;
5292 temp2
= remainder
& 0x0000ffff;
5294 /* Overlaps outside this range are best done using other methods. */
5295 for (i
= 9; i
< 24; i
++)
5297 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
5298 && !const_ok_for_arm (temp2
))
5300 rtx new_src
= (subtargets
5301 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
5303 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
5304 source
, subtargets
, generate
);
5312 gen_rtx_ASHIFT (mode
, source
,
5319 /* Don't duplicate cases already considered. */
5320 for (i
= 17; i
< 24; i
++)
5322 if (((temp1
| (temp1
>> i
)) == remainder
)
5323 && !const_ok_for_arm (temp1
))
5325 rtx new_src
= (subtargets
5326 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
5328 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
5329 source
, subtargets
, generate
);
5334 gen_rtx_SET (target
,
5337 gen_rtx_LSHIFTRT (mode
, source
,
5348 /* If we have IOR or XOR, and the constant can be loaded in a
5349 single instruction, and we can find a temporary to put it in,
5350 then this can be done in two instructions instead of 3-4. */
5352 /* TARGET can't be NULL if SUBTARGETS is 0 */
5353 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
5355 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
5359 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5361 emit_constant_insn (cond
,
5362 gen_rtx_SET (sub
, GEN_INT (val
)));
5363 emit_constant_insn (cond
,
5364 gen_rtx_SET (target
,
5365 gen_rtx_fmt_ee (code
, mode
,
5376 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5377 and the remainder 0s for e.g. 0xfff00000)
5378 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5380 This can be done in 2 instructions by using shifts with mov or mvn.
5385 mvn r0, r0, lsr #12 */
5386 if (set_sign_bit_copies
> 8
5387 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
5391 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5392 rtx shift
= GEN_INT (set_sign_bit_copies
);
5398 gen_rtx_ASHIFT (mode
,
5403 gen_rtx_SET (target
,
5405 gen_rtx_LSHIFTRT (mode
, sub
,
5412 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5414 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5416 For eg. r0 = r0 | 0xfff
5421 if (set_zero_bit_copies
> 8
5422 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
5426 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5427 rtx shift
= GEN_INT (set_zero_bit_copies
);
5433 gen_rtx_LSHIFTRT (mode
,
5438 gen_rtx_SET (target
,
5440 gen_rtx_ASHIFT (mode
, sub
,
5446 /* This will never be reached for Thumb2 because orn is a valid
5447 instruction. This is for Thumb1 and the ARM 32 bit cases.
5449 x = y | constant (such that ~constant is a valid constant)
5451 x = ~(~y & ~constant).
5453 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
5457 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5458 emit_constant_insn (cond
,
5460 gen_rtx_NOT (mode
, source
)));
5463 sub
= gen_reg_rtx (mode
);
5464 emit_constant_insn (cond
,
5466 gen_rtx_AND (mode
, source
,
5468 emit_constant_insn (cond
,
5469 gen_rtx_SET (target
,
5470 gen_rtx_NOT (mode
, sub
)));
5477 /* See if two shifts will do 2 or more insn's worth of work. */
5478 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
5480 HOST_WIDE_INT shift_mask
= ((0xffffffff
5481 << (32 - clear_sign_bit_copies
))
5484 if ((remainder
| shift_mask
) != 0xffffffff)
5486 HOST_WIDE_INT new_val
5487 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5491 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5492 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
5493 new_src
, source
, subtargets
, 1);
5498 rtx targ
= subtargets
? NULL_RTX
: target
;
5499 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5500 targ
, source
, subtargets
, 0);
5506 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5507 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5509 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5510 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5516 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5518 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5520 if ((remainder
| shift_mask
) != 0xffffffff)
5522 HOST_WIDE_INT new_val
5523 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5526 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5528 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5529 new_src
, source
, subtargets
, 1);
5534 rtx targ
= subtargets
? NULL_RTX
: target
;
5536 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5537 targ
, source
, subtargets
, 0);
5543 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5544 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5546 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5547 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5559 /* Calculate what the instruction sequences would be if we generated it
5560 normally, negated, or inverted. */
5562 /* AND cannot be split into multiple insns, so invert and use BIC. */
5565 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5568 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5573 if (can_invert
|| final_invert
)
5574 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5579 immediates
= &pos_immediates
;
5581 /* Is the negated immediate sequence more efficient? */
5582 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5585 immediates
= &neg_immediates
;
5590 /* Is the inverted immediate sequence more efficient?
5591 We must allow for an extra NOT instruction for XOR operations, although
5592 there is some chance that the final 'mvn' will get optimized later. */
5593 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5596 immediates
= &inv_immediates
;
5604 /* Now output the chosen sequence as instructions. */
5607 for (i
= 0; i
< insns
; i
++)
5609 rtx new_src
, temp1_rtx
;
5611 temp1
= immediates
->i
[i
];
5613 if (code
== SET
|| code
== MINUS
)
5614 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5615 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5616 new_src
= gen_reg_rtx (mode
);
5622 else if (can_negate
)
5625 temp1
= trunc_int_for_mode (temp1
, mode
);
5626 temp1_rtx
= GEN_INT (temp1
);
5630 else if (code
== MINUS
)
5631 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5633 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5635 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5640 can_negate
= can_invert
;
5644 else if (code
== MINUS
)
5652 emit_constant_insn (cond
, gen_rtx_SET (target
,
5653 gen_rtx_NOT (mode
, source
)));
5660 /* Return TRUE if op is a constant where both the low and top words are
5661 suitable for RSB/RSC instructions. This is never true for Thumb, since
5662 we do not have RSC in that case. */
5664 arm_const_double_prefer_rsbs_rsc (rtx op
)
5666 /* Thumb lacks RSC, so we never prefer that sequence. */
5667 if (TARGET_THUMB
|| !CONST_INT_P (op
))
5669 HOST_WIDE_INT hi
, lo
;
5670 lo
= UINTVAL (op
) & 0xffffffffULL
;
5671 hi
= UINTVAL (op
) >> 32;
5672 return const_ok_for_arm (lo
) && const_ok_for_arm (hi
);
5675 /* Canonicalize a comparison so that we are more likely to recognize it.
5676 This can be done for a few constant compares, where we can make the
5677 immediate value easier to load. */
5680 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5681 bool op0_preserve_value
)
5684 unsigned HOST_WIDE_INT i
, maxval
;
5686 mode
= GET_MODE (*op0
);
5687 if (mode
== VOIDmode
)
5688 mode
= GET_MODE (*op1
);
5690 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5692 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5693 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5694 either reversed or (for constant OP1) adjusted to GE/LT.
5695 Similarly for GTU/LEU in Thumb mode. */
5699 if (*code
== GT
|| *code
== LE
5700 || *code
== GTU
|| *code
== LEU
)
5702 /* Missing comparison. First try to use an available
5704 if (CONST_INT_P (*op1
))
5713 /* Try to convert to GE/LT, unless that would be more
5715 if (!arm_const_double_by_immediates (GEN_INT (i
+ 1))
5716 && arm_const_double_prefer_rsbs_rsc (*op1
))
5718 *op1
= GEN_INT (i
+ 1);
5719 *code
= *code
== GT
? GE
: LT
;
5723 /* GT maxval is always false, LE maxval is always true.
5724 We can't fold that away here as we must make a
5725 comparison, but we can fold them to comparisons
5726 with the same result that can be handled:
5727 op0 GT maxval -> op0 LT minval
5728 op0 LE maxval -> op0 GE minval
5729 where minval = (-maxval - 1). */
5730 *op1
= GEN_INT (-maxval
- 1);
5731 *code
= *code
== GT
? LT
: GE
;
5737 if (i
!= ~((unsigned HOST_WIDE_INT
) 0))
5739 /* Try to convert to GEU/LTU, unless that would
5740 be more expensive. */
5741 if (!arm_const_double_by_immediates (GEN_INT (i
+ 1))
5742 && arm_const_double_prefer_rsbs_rsc (*op1
))
5744 *op1
= GEN_INT (i
+ 1);
5745 *code
= *code
== GTU
? GEU
: LTU
;
5749 /* GTU ~0 is always false, LEU ~0 is always true.
5750 We can't fold that away here as we must make a
5751 comparison, but we can fold them to comparisons
5752 with the same result that can be handled:
5753 op0 GTU ~0 -> op0 LTU 0
5754 op0 LEU ~0 -> op0 GEU 0. */
5756 *code
= *code
== GTU
? LTU
: GEU
;
5765 if (!op0_preserve_value
)
5767 std::swap (*op0
, *op1
);
5768 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5774 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5775 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5776 to facilitate possible combining with a cmp into 'ands'. */
5778 && GET_CODE (*op0
) == ZERO_EXTEND
5779 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5780 && GET_MODE (XEXP (*op0
, 0)) == QImode
5781 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5782 && subreg_lowpart_p (XEXP (*op0
, 0))
5783 && *op1
== const0_rtx
)
5784 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5787 /* Comparisons smaller than DImode. Only adjust comparisons against
5788 an out-of-range constant. */
5789 if (!CONST_INT_P (*op1
)
5790 || const_ok_for_arm (INTVAL (*op1
))
5791 || const_ok_for_arm (- INTVAL (*op1
)))
5805 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5807 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5808 *code
= *code
== GT
? GE
: LT
;
5816 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5818 *op1
= GEN_INT (i
- 1);
5819 *code
= *code
== GE
? GT
: LE
;
5826 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5827 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5829 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5830 *code
= *code
== GTU
? GEU
: LTU
;
5838 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5840 *op1
= GEN_INT (i
- 1);
5841 *code
= *code
== GEU
? GTU
: LEU
;
5852 /* Define how to find the value returned by a function. */
5855 arm_function_value(const_tree type
, const_tree func
,
5856 bool outgoing ATTRIBUTE_UNUSED
)
5859 int unsignedp ATTRIBUTE_UNUSED
;
5860 rtx r ATTRIBUTE_UNUSED
;
5862 mode
= TYPE_MODE (type
);
5864 if (TARGET_AAPCS_BASED
)
5865 return aapcs_allocate_return_reg (mode
, type
, func
);
5867 /* Promote integer types. */
5868 if (INTEGRAL_TYPE_P (type
))
5869 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5871 /* Promotes small structs returned in a register to full-word size
5872 for big-endian AAPCS. */
5873 if (arm_return_in_msb (type
))
5875 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5876 if (size
% UNITS_PER_WORD
!= 0)
5878 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5879 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
5883 return arm_libcall_value_1 (mode
);
5886 /* libcall hashtable helpers. */
5888 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5890 static inline hashval_t
hash (const rtx_def
*);
5891 static inline bool equal (const rtx_def
*, const rtx_def
*);
5892 static inline void remove (rtx_def
*);
5896 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5898 return rtx_equal_p (p1
, p2
);
5902 libcall_hasher::hash (const rtx_def
*p1
)
5904 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5907 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5910 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5912 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5916 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5918 static bool init_done
= false;
5919 static libcall_table_type
*libcall_htab
= NULL
;
5925 libcall_htab
= new libcall_table_type (31);
5926 add_libcall (libcall_htab
,
5927 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5928 add_libcall (libcall_htab
,
5929 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5930 add_libcall (libcall_htab
,
5931 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5932 add_libcall (libcall_htab
,
5933 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5935 add_libcall (libcall_htab
,
5936 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5937 add_libcall (libcall_htab
,
5938 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5939 add_libcall (libcall_htab
,
5940 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5941 add_libcall (libcall_htab
,
5942 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5944 add_libcall (libcall_htab
,
5945 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5946 add_libcall (libcall_htab
,
5947 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5948 add_libcall (libcall_htab
,
5949 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5950 add_libcall (libcall_htab
,
5951 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5952 add_libcall (libcall_htab
,
5953 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5954 add_libcall (libcall_htab
,
5955 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5956 add_libcall (libcall_htab
,
5957 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5958 add_libcall (libcall_htab
,
5959 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5960 add_libcall (libcall_htab
,
5961 convert_optab_libfunc (sfix_optab
, SImode
, SFmode
));
5962 add_libcall (libcall_htab
,
5963 convert_optab_libfunc (ufix_optab
, SImode
, SFmode
));
5965 /* Values from double-precision helper functions are returned in core
5966 registers if the selected core only supports single-precision
5967 arithmetic, even if we are using the hard-float ABI. The same is
5968 true for single-precision helpers except in case of MVE, because in
5969 MVE we will be using the hard-float ABI on a CPU which doesn't support
5970 single-precision operations in hardware. In MVE the following check
5971 enables use of emulation for the single-precision arithmetic
5973 if (TARGET_HAVE_MVE
)
5975 add_libcall (libcall_htab
, optab_libfunc (add_optab
, SFmode
));
5976 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, SFmode
));
5977 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, SFmode
));
5978 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, SFmode
));
5979 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, SFmode
));
5980 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, SFmode
));
5981 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, SFmode
));
5982 add_libcall (libcall_htab
, optab_libfunc (le_optab
, SFmode
));
5983 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, SFmode
));
5984 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, SFmode
));
5985 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, SFmode
));
5987 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5988 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5989 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5990 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5991 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5992 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5993 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5994 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5995 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5996 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5997 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5998 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
6000 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
6002 add_libcall (libcall_htab
,
6003 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
6006 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
6010 arm_libcall_value_1 (machine_mode mode
)
6012 if (TARGET_AAPCS_BASED
)
6013 return aapcs_libcall_value (mode
);
6014 else if (TARGET_IWMMXT_ABI
6015 && arm_vector_mode_supported_p (mode
))
6016 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
6018 return gen_rtx_REG (mode
, ARG_REGISTER (1));
6021 /* Define how to find the value returned by a library function
6022 assuming the value has mode MODE. */
6025 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
6027 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
6028 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6030 /* The following libcalls return their result in integer registers,
6031 even though they return a floating point value. */
6032 if (arm_libcall_uses_aapcs_base (libcall
))
6033 return gen_rtx_REG (mode
, ARG_REGISTER(1));
6037 return arm_libcall_value_1 (mode
);
6040 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
6043 arm_function_value_regno_p (const unsigned int regno
)
6045 if (regno
== ARG_REGISTER (1)
6047 && TARGET_AAPCS_BASED
6048 && TARGET_HARD_FLOAT
6049 && regno
== FIRST_VFP_REGNUM
)
6050 || (TARGET_IWMMXT_ABI
6051 && regno
== FIRST_IWMMXT_REGNUM
))
6057 /* Determine the amount of memory needed to store the possible return
6058 registers of an untyped call. */
6060 arm_apply_result_size (void)
6066 if (TARGET_HARD_FLOAT_ABI
)
6068 if (TARGET_IWMMXT_ABI
)
6075 /* Decide whether TYPE should be returned in memory (true)
6076 or in a register (false). FNTYPE is the type of the function making
6079 arm_return_in_memory (const_tree type
, const_tree fntype
)
6083 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
6085 if (TARGET_AAPCS_BASED
)
6087 /* Simple, non-aggregate types (ie not including vectors and
6088 complex) are always returned in a register (or registers).
6089 We don't care about which register here, so we can short-cut
6090 some of the detail. */
6091 if (!AGGREGATE_TYPE_P (type
)
6092 && TREE_CODE (type
) != VECTOR_TYPE
6093 && TREE_CODE (type
) != COMPLEX_TYPE
)
6096 /* Any return value that is no larger than one word can be
6098 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
6101 /* Check any available co-processors to see if they accept the
6102 type as a register candidate (VFP, for example, can return
6103 some aggregates in consecutive registers). These aren't
6104 available if the call is variadic. */
6105 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
6108 /* Vector values should be returned using ARM registers, not
6109 memory (unless they're over 16 bytes, which will break since
6110 we only have four call-clobbered registers to play with). */
6111 if (TREE_CODE (type
) == VECTOR_TYPE
)
6112 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
6114 /* The rest go in memory. */
6118 if (TREE_CODE (type
) == VECTOR_TYPE
)
6119 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
6121 if (!AGGREGATE_TYPE_P (type
) &&
6122 (TREE_CODE (type
) != VECTOR_TYPE
))
6123 /* All simple types are returned in registers. */
6126 if (arm_abi
!= ARM_ABI_APCS
)
6128 /* ATPCS and later return aggregate types in memory only if they are
6129 larger than a word (or are variable size). */
6130 return (size
< 0 || size
> UNITS_PER_WORD
);
6133 /* For the arm-wince targets we choose to be compatible with Microsoft's
6134 ARM and Thumb compilers, which always return aggregates in memory. */
6136 /* All structures/unions bigger than one word are returned in memory.
6137 Also catch the case where int_size_in_bytes returns -1. In this case
6138 the aggregate is either huge or of variable size, and in either case
6139 we will want to return it via memory and not in a register. */
6140 if (size
< 0 || size
> UNITS_PER_WORD
)
6143 if (TREE_CODE (type
) == RECORD_TYPE
)
6147 /* For a struct the APCS says that we only return in a register
6148 if the type is 'integer like' and every addressable element
6149 has an offset of zero. For practical purposes this means
6150 that the structure can have at most one non bit-field element
6151 and that this element must be the first one in the structure. */
6153 /* Find the first field, ignoring non FIELD_DECL things which will
6154 have been created by C++. */
6155 /* NOTE: This code is deprecated and has not been updated to handle
6156 DECL_FIELD_ABI_IGNORED. */
6157 for (field
= TYPE_FIELDS (type
);
6158 field
&& TREE_CODE (field
) != FIELD_DECL
;
6159 field
= DECL_CHAIN (field
))
6163 return false; /* An empty structure. Allowed by an extension to ANSI C. */
6165 /* Check that the first field is valid for returning in a register. */
6167 /* ... Floats are not allowed */
6168 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
6171 /* ... Aggregates that are not themselves valid for returning in
6172 a register are not allowed. */
6173 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
6176 /* Now check the remaining fields, if any. Only bitfields are allowed,
6177 since they are not addressable. */
6178 for (field
= DECL_CHAIN (field
);
6180 field
= DECL_CHAIN (field
))
6182 if (TREE_CODE (field
) != FIELD_DECL
)
6185 if (!DECL_BIT_FIELD_TYPE (field
))
6192 if (TREE_CODE (type
) == UNION_TYPE
)
6196 /* Unions can be returned in registers if every element is
6197 integral, or can be returned in an integer register. */
6198 for (field
= TYPE_FIELDS (type
);
6200 field
= DECL_CHAIN (field
))
6202 if (TREE_CODE (field
) != FIELD_DECL
)
6205 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
6208 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
6214 #endif /* not ARM_WINCE */
6216 /* Return all other types in memory. */
6220 const struct pcs_attribute_arg
6224 } pcs_attribute_args
[] =
6226 {"aapcs", ARM_PCS_AAPCS
},
6227 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
6229 /* We could recognize these, but changes would be needed elsewhere
6230 * to implement them. */
6231 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
6232 {"atpcs", ARM_PCS_ATPCS
},
6233 {"apcs", ARM_PCS_APCS
},
6235 {NULL
, ARM_PCS_UNKNOWN
}
6239 arm_pcs_from_attribute (tree attr
)
6241 const struct pcs_attribute_arg
*ptr
;
6244 /* Get the value of the argument. */
6245 if (TREE_VALUE (attr
) == NULL_TREE
6246 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
6247 return ARM_PCS_UNKNOWN
;
6249 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
6251 /* Check it against the list of known arguments. */
6252 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
6253 if (streq (arg
, ptr
->arg
))
6256 /* An unrecognized interrupt type. */
6257 return ARM_PCS_UNKNOWN
;
6260 /* Get the PCS variant to use for this call. TYPE is the function's type
6261 specification, DECL is the specific declartion. DECL may be null if
6262 the call could be indirect or if this is a library call. */
6264 arm_get_pcs_model (const_tree type
, const_tree decl ATTRIBUTE_UNUSED
)
6266 bool user_convention
= false;
6267 enum arm_pcs user_pcs
= arm_pcs_default
;
6272 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
6275 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
6276 user_convention
= true;
6279 if (TARGET_AAPCS_BASED
)
6281 /* Detect varargs functions. These always use the base rules
6282 (no argument is ever a candidate for a co-processor
6284 bool base_rules
= stdarg_p (type
);
6286 if (user_convention
)
6288 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
6289 sorry ("non-AAPCS derived PCS variant");
6290 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
6291 error ("variadic functions must use the base AAPCS variant");
6295 return ARM_PCS_AAPCS
;
6296 else if (user_convention
)
6299 /* Unfortunately, this is not safe and can lead to wrong code
6300 being generated (PR96882). Not all calls into the back-end
6301 pass the DECL, so it is unsafe to make any PCS-changing
6302 decisions based on it. In particular the RETURN_IN_MEMORY
6303 hook is only ever passed a TYPE. This needs revisiting to
6304 see if there are any partial improvements that can be
6306 else if (decl
&& flag_unit_at_a_time
)
6308 /* Local functions never leak outside this compilation unit,
6309 so we are free to use whatever conventions are
6311 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6312 cgraph_node
*local_info_node
6313 = cgraph_node::local_info_node (CONST_CAST_TREE (decl
));
6314 if (local_info_node
&& local_info_node
->local
)
6315 return ARM_PCS_AAPCS_LOCAL
;
6319 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
6320 sorry ("PCS variant");
6322 /* For everything else we use the target's default. */
6323 return arm_pcs_default
;
6328 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6329 const_tree fntype ATTRIBUTE_UNUSED
,
6330 rtx libcall ATTRIBUTE_UNUSED
,
6331 const_tree fndecl ATTRIBUTE_UNUSED
)
6333 /* Record the unallocated VFP registers. */
6334 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
6335 pcum
->aapcs_vfp_reg_alloc
= 0;
6338 /* Bitmasks that indicate whether earlier versions of GCC would have
6339 taken a different path through the ABI logic. This should result in
6340 a -Wpsabi warning if the earlier path led to a different ABI decision.
6342 WARN_PSABI_EMPTY_CXX17_BASE
6343 Indicates that the type includes an artificial empty C++17 base field
6344 that, prior to GCC 10.1, would prevent the type from being treated as
6345 a HFA or HVA. See PR94711 for details.
6347 WARN_PSABI_NO_UNIQUE_ADDRESS
6348 Indicates that the type includes an empty [[no_unique_address]] field
6349 that, prior to GCC 10.1, would prevent the type from being treated as
6351 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE
= 1U << 0;
6352 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS
= 1U << 1;
6353 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD
= 1U << 2;
6355 /* Walk down the type tree of TYPE counting consecutive base elements.
6356 If *MODEP is VOIDmode, then set it to the first valid floating point
6357 type. If a non-floating point type is found, or if a floating point
6358 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6359 otherwise return the count in the sub-tree.
6361 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6362 function has changed its behavior relative to earlier versions of GCC.
6363 Normally the argument should be nonnull and point to a zero-initialized
6364 variable. The function then records whether the ABI decision might
6365 be affected by a known fix to the ABI logic, setting the associated
6366 WARN_PSABI_* bits if so.
6368 When the argument is instead a null pointer, the function tries to
6369 simulate the behavior of GCC before all such ABI fixes were made.
6370 This is useful to check whether the function returns something
6371 different after the ABI fixes. */
6373 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
,
6374 unsigned int *warn_psabi_flags
)
6379 switch (TREE_CODE (type
))
6382 mode
= TYPE_MODE (type
);
6383 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
&& mode
!= BFmode
)
6386 if (*modep
== VOIDmode
)
6395 mode
= TYPE_MODE (TREE_TYPE (type
));
6396 if (mode
!= DFmode
&& mode
!= SFmode
)
6399 if (*modep
== VOIDmode
)
6408 /* Use V2SImode and V4SImode as representatives of all 64-bit
6409 and 128-bit vector types, whether or not those modes are
6410 supported with the present options. */
6411 size
= int_size_in_bytes (type
);
6424 if (*modep
== VOIDmode
)
6427 /* Vector modes are considered to be opaque: two vectors are
6428 equivalent for the purposes of being homogeneous aggregates
6429 if they are the same size. */
6438 tree index
= TYPE_DOMAIN (type
);
6440 /* Can't handle incomplete types nor sizes that are not
6442 if (!COMPLETE_TYPE_P (type
)
6443 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6446 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
,
6450 || !TYPE_MAX_VALUE (index
)
6451 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
6452 || !TYPE_MIN_VALUE (index
)
6453 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
6457 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
6458 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
6460 /* There must be no padding. */
6461 if (wi::to_wide (TYPE_SIZE (type
))
6462 != count
* GET_MODE_BITSIZE (*modep
))
6474 /* Can't handle incomplete types nor sizes that are not
6476 if (!COMPLETE_TYPE_P (type
)
6477 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6480 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6482 if (TREE_CODE (field
) != FIELD_DECL
)
6485 if (DECL_FIELD_ABI_IGNORED (field
))
6487 /* See whether this is something that earlier versions of
6488 GCC failed to ignore. */
6490 if (lookup_attribute ("no_unique_address",
6491 DECL_ATTRIBUTES (field
)))
6492 flag
= WARN_PSABI_NO_UNIQUE_ADDRESS
;
6493 else if (cxx17_empty_base_field_p (field
))
6494 flag
= WARN_PSABI_EMPTY_CXX17_BASE
;
6496 /* No compatibility problem. */
6499 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6500 if (warn_psabi_flags
)
6502 *warn_psabi_flags
|= flag
;
6506 /* A zero-width bitfield may affect layout in some
6507 circumstances, but adds no members. The determination
6508 of whether or not a type is an HFA is performed after
6509 layout is complete, so if the type still looks like an
6510 HFA afterwards, it is still classed as one. This is
6511 potentially an ABI break for the hard-float ABI. */
6512 else if (DECL_BIT_FIELD (field
)
6513 && integer_zerop (DECL_SIZE (field
)))
6515 /* Prior to GCC-12 these fields were striped early,
6516 hiding them from the back-end entirely and
6517 resulting in the correct behaviour for argument
6518 passing. Simulate that old behaviour without
6519 generating a warning. */
6520 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field
))
6522 if (warn_psabi_flags
)
6524 *warn_psabi_flags
|= WARN_PSABI_ZERO_WIDTH_BITFIELD
;
6529 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
,
6536 /* There must be no padding. */
6537 if (wi::to_wide (TYPE_SIZE (type
))
6538 != count
* GET_MODE_BITSIZE (*modep
))
6545 case QUAL_UNION_TYPE
:
6547 /* These aren't very interesting except in a degenerate case. */
6552 /* Can't handle incomplete types nor sizes that are not
6554 if (!COMPLETE_TYPE_P (type
)
6555 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6558 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6560 if (TREE_CODE (field
) != FIELD_DECL
)
6563 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
,
6567 count
= count
> sub_count
? count
: sub_count
;
6570 /* There must be no padding. */
6571 if (wi::to_wide (TYPE_SIZE (type
))
6572 != count
* GET_MODE_BITSIZE (*modep
))
6585 /* Return true if PCS_VARIANT should use VFP registers. */
6587 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
6589 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
6591 static bool seen_thumb1_vfp
= false;
6593 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
6595 sorry ("Thumb-1 %<hard-float%> VFP ABI");
6596 /* sorry() is not immediately fatal, so only display this once. */
6597 seen_thumb1_vfp
= true;
6603 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
6606 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
6607 (TARGET_VFP_DOUBLE
|| !is_double
));
6610 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6611 suitable for passing or returning in VFP registers for the PCS
6612 variant selected. If it is, then *BASE_MODE is updated to contain
6613 a machine mode describing each element of the argument's type and
6614 *COUNT to hold the number of such elements. */
6616 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
6617 machine_mode mode
, const_tree type
,
6618 machine_mode
*base_mode
, int *count
)
6620 machine_mode new_mode
= VOIDmode
;
6622 /* If we have the type information, prefer that to working things
6623 out from the mode. */
6626 unsigned int warn_psabi_flags
= 0;
6627 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
,
6629 if (ag_count
> 0 && ag_count
<= 4)
6631 static unsigned last_reported_type_uid
;
6632 unsigned uid
= TYPE_UID (TYPE_MAIN_VARIANT (type
));
6636 && uid
!= last_reported_type_uid
6637 && ((alt
= aapcs_vfp_sub_candidate (type
, &new_mode
, NULL
))
6641 = CHANGES_ROOT_URL
"gcc-10/changes.html#empty_base";
6643 = CHANGES_ROOT_URL
"gcc-12/changes.html#zero_width_bitfields";
6644 gcc_assert (alt
== -1);
6645 last_reported_type_uid
= uid
;
6646 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6648 if (warn_psabi_flags
& WARN_PSABI_NO_UNIQUE_ADDRESS
)
6649 inform (input_location
, "parameter passing for argument of "
6650 "type %qT with %<[[no_unique_address]]%> members "
6651 "changed %{in GCC 10.1%}",
6652 TYPE_MAIN_VARIANT (type
), url10
);
6653 else if (warn_psabi_flags
& WARN_PSABI_EMPTY_CXX17_BASE
)
6654 inform (input_location
, "parameter passing for argument of "
6655 "type %qT when C++17 is enabled changed to match "
6656 "C++14 %{in GCC 10.1%}",
6657 TYPE_MAIN_VARIANT (type
), url10
);
6658 else if (warn_psabi_flags
& WARN_PSABI_ZERO_WIDTH_BITFIELD
)
6659 inform (input_location
, "parameter passing for argument of "
6660 "type %qT changed %{in GCC 12.1%}",
6661 TYPE_MAIN_VARIANT (type
), url12
);
6668 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
6669 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6670 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6675 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6678 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
6684 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
6687 *base_mode
= new_mode
;
6689 if (TARGET_GENERAL_REGS_ONLY
)
6690 error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6697 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
6698 machine_mode mode
, const_tree type
)
6700 int count ATTRIBUTE_UNUSED
;
6701 machine_mode ag_mode ATTRIBUTE_UNUSED
;
6703 if (!use_vfp_abi (pcs_variant
, false))
6705 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6710 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6713 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6716 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6717 &pcum
->aapcs_vfp_rmode
,
6718 &pcum
->aapcs_vfp_rcount
);
6721 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6722 for the behaviour of this function. */
6725 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6726 const_tree type ATTRIBUTE_UNUSED
)
6729 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6730 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6731 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6734 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6735 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6737 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6739 || (mode
== TImode
&& ! (TARGET_NEON
|| TARGET_HAVE_MVE
))
6740 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6743 int rcount
= pcum
->aapcs_vfp_rcount
;
6745 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6747 if (!(TARGET_NEON
|| TARGET_HAVE_MVE
))
6749 /* Avoid using unsupported vector modes. */
6750 if (rmode
== V2SImode
)
6752 else if (rmode
== V4SImode
)
6759 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6760 for (i
= 0; i
< rcount
; i
++)
6762 rtx tmp
= gen_rtx_REG (rmode
,
6763 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6764 tmp
= gen_rtx_EXPR_LIST
6766 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6767 XVECEXP (par
, 0, i
) = tmp
;
6770 pcum
->aapcs_reg
= par
;
6773 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6779 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6780 comment there for the behaviour of this function. */
6783 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6785 const_tree type ATTRIBUTE_UNUSED
)
6787 if (!use_vfp_abi (pcs_variant
, false))
6791 || (GET_MODE_CLASS (mode
) == MODE_INT
6792 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6793 && !(TARGET_NEON
|| TARGET_HAVE_MVE
)))
6796 machine_mode ag_mode
;
6801 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6804 if (!(TARGET_NEON
|| TARGET_HAVE_MVE
))
6806 if (ag_mode
== V2SImode
)
6808 else if (ag_mode
== V4SImode
)
6814 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6815 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6816 for (i
= 0; i
< count
; i
++)
6818 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6819 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6820 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6821 XVECEXP (par
, 0, i
) = tmp
;
6827 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6831 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6832 machine_mode mode ATTRIBUTE_UNUSED
,
6833 const_tree type ATTRIBUTE_UNUSED
)
6835 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6836 pcum
->aapcs_vfp_reg_alloc
= 0;
6840 #define AAPCS_CP(X) \
6842 aapcs_ ## X ## _cum_init, \
6843 aapcs_ ## X ## _is_call_candidate, \
6844 aapcs_ ## X ## _allocate, \
6845 aapcs_ ## X ## _is_return_candidate, \
6846 aapcs_ ## X ## _allocate_return_reg, \
6847 aapcs_ ## X ## _advance \
6850 /* Table of co-processors that can be used to pass arguments in
6851 registers. Idealy no arugment should be a candidate for more than
6852 one co-processor table entry, but the table is processed in order
6853 and stops after the first match. If that entry then fails to put
6854 the argument into a co-processor register, the argument will go on
6858 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6859 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6861 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6862 BLKmode) is a candidate for this co-processor's registers; this
6863 function should ignore any position-dependent state in
6864 CUMULATIVE_ARGS and only use call-type dependent information. */
6865 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6867 /* Return true if the argument does get a co-processor register; it
6868 should set aapcs_reg to an RTX of the register allocated as is
6869 required for a return from FUNCTION_ARG. */
6870 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6872 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6873 be returned in this co-processor's registers. */
6874 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6876 /* Allocate and return an RTX element to hold the return type of a call. This
6877 routine must not fail and will only be called if is_return_candidate
6878 returned true with the same parameters. */
6879 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6881 /* Finish processing this argument and prepare to start processing
6883 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6884 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6892 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6897 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6898 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6905 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6907 /* We aren't passed a decl, so we can't check that a call is local.
6908 However, it isn't clear that that would be a win anyway, since it
6909 might limit some tail-calling opportunities. */
6910 enum arm_pcs pcs_variant
;
6914 const_tree fndecl
= NULL_TREE
;
6916 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6919 fntype
= TREE_TYPE (fntype
);
6922 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6925 pcs_variant
= arm_pcs_default
;
6927 if (pcs_variant
!= ARM_PCS_AAPCS
)
6931 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6932 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6941 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6944 /* We aren't passed a decl, so we can't check that a call is local.
6945 However, it isn't clear that that would be a win anyway, since it
6946 might limit some tail-calling opportunities. */
6947 enum arm_pcs pcs_variant
;
6948 int unsignedp ATTRIBUTE_UNUSED
;
6952 const_tree fndecl
= NULL_TREE
;
6954 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6957 fntype
= TREE_TYPE (fntype
);
6960 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6963 pcs_variant
= arm_pcs_default
;
6965 /* Promote integer types. */
6966 if (type
&& INTEGRAL_TYPE_P (type
))
6967 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6969 if (pcs_variant
!= ARM_PCS_AAPCS
)
6973 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6974 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6976 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6980 /* Promotes small structs returned in a register to full-word size
6981 for big-endian AAPCS. */
6982 if (type
&& arm_return_in_msb (type
))
6984 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6985 if (size
% UNITS_PER_WORD
!= 0)
6987 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6988 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
6992 return gen_rtx_REG (mode
, R0_REGNUM
);
6996 aapcs_libcall_value (machine_mode mode
)
6998 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6999 && GET_MODE_SIZE (mode
) <= 4)
7002 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
7005 /* Lay out a function argument using the AAPCS rules. The rule
7006 numbers referred to here are those in the AAPCS. */
7008 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
7009 const_tree type
, bool named
)
7014 /* We only need to do this once per argument. */
7015 if (pcum
->aapcs_arg_processed
)
7018 pcum
->aapcs_arg_processed
= true;
7020 /* Special case: if named is false then we are handling an incoming
7021 anonymous argument which is on the stack. */
7025 /* Is this a potential co-processor register candidate? */
7026 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
7028 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
7029 pcum
->aapcs_cprc_slot
= slot
;
7031 /* We don't have to apply any of the rules from part B of the
7032 preparation phase, these are handled elsewhere in the
7037 /* A Co-processor register candidate goes either in its own
7038 class of registers or on the stack. */
7039 if (!pcum
->aapcs_cprc_failed
[slot
])
7041 /* C1.cp - Try to allocate the argument to co-processor
7043 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
7046 /* C2.cp - Put the argument on the stack and note that we
7047 can't assign any more candidates in this slot. We also
7048 need to note that we have allocated stack space, so that
7049 we won't later try to split a non-cprc candidate between
7050 core registers and the stack. */
7051 pcum
->aapcs_cprc_failed
[slot
] = true;
7052 pcum
->can_split
= false;
7055 /* We didn't get a register, so this argument goes on the
7057 gcc_assert (pcum
->can_split
== false);
7062 /* C3 - For double-word aligned arguments, round the NCRN up to the
7063 next even number. */
7064 ncrn
= pcum
->aapcs_ncrn
;
7067 int res
= arm_needs_doubleword_align (mode
, type
);
7068 /* Only warn during RTL expansion of call stmts, otherwise we would
7069 warn e.g. during gimplification even on functions that will be
7070 always inlined, and we'd warn multiple times. Don't warn when
7071 called in expand_function_start either, as we warn instead in
7072 arm_function_arg_boundary in that case. */
7073 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
7074 inform (input_location
, "parameter passing for argument of type "
7075 "%qT changed in GCC 7.1", type
);
7080 nregs
= ARM_NUM_REGS2(mode
, type
);
7082 /* Sigh, this test should really assert that nregs > 0, but a GCC
7083 extension allows empty structs and then gives them empty size; it
7084 then allows such a structure to be passed by value. For some of
7085 the code below we have to pretend that such an argument has
7086 non-zero size so that we 'locate' it correctly either in
7087 registers or on the stack. */
7088 gcc_assert (nregs
>= 0);
7090 nregs2
= nregs
? nregs
: 1;
7092 /* C4 - Argument fits entirely in core registers. */
7093 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
7095 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
7096 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
7100 /* C5 - Some core registers left and there are no arguments already
7101 on the stack: split this argument between the remaining core
7102 registers and the stack. */
7103 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
7105 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
7106 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
7107 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
7111 /* C6 - NCRN is set to 4. */
7112 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
7114 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
7118 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7119 for a call to a function whose data type is FNTYPE.
7120 For a library call, FNTYPE is NULL. */
7122 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
7124 tree fndecl ATTRIBUTE_UNUSED
)
7126 /* Long call handling. */
7128 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
7130 pcum
->pcs_variant
= arm_pcs_default
;
7132 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7134 if (arm_libcall_uses_aapcs_base (libname
))
7135 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
7137 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
7138 pcum
->aapcs_reg
= NULL_RTX
;
7139 pcum
->aapcs_partial
= 0;
7140 pcum
->aapcs_arg_processed
= false;
7141 pcum
->aapcs_cprc_slot
= -1;
7142 pcum
->can_split
= true;
7144 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
7148 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
7150 pcum
->aapcs_cprc_failed
[i
] = false;
7151 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
7159 /* On the ARM, the offset starts at 0. */
7161 pcum
->iwmmxt_nregs
= 0;
7162 pcum
->can_split
= true;
7164 /* Varargs vectors are treated the same as long long.
7165 named_count avoids having to change the way arm handles 'named' */
7166 pcum
->named_count
= 0;
7169 if (TARGET_REALLY_IWMMXT
&& fntype
)
7173 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
7175 fn_arg
= TREE_CHAIN (fn_arg
))
7176 pcum
->named_count
+= 1;
7178 if (! pcum
->named_count
)
7179 pcum
->named_count
= INT_MAX
;
7183 /* Return 2 if double word alignment is required for argument passing,
7184 but wasn't required before the fix for PR88469.
7185 Return 1 if double word alignment is required for argument passing.
7186 Return -1 if double word alignment used to be required for argument
7187 passing before PR77728 ABI fix, but is not required anymore.
7188 Return 0 if double word alignment is not required and wasn't requried
7191 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
7194 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
7196 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
7197 if (!AGGREGATE_TYPE_P (type
))
7198 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
7200 /* Array types: Use member alignment of element type. */
7201 if (TREE_CODE (type
) == ARRAY_TYPE
)
7202 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
7206 /* Record/aggregate types: Use greatest member alignment of any member.
7208 Note that we explicitly consider zero-sized fields here, even though
7209 they don't map to AAPCS machine types. For example, in:
7211 struct __attribute__((aligned(8))) empty {};
7214 [[no_unique_address]] empty e;
7218 "s" contains only one Fundamental Data Type (the int field)
7219 but gains 8-byte alignment and size thanks to "e". */
7220 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7221 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
7223 if (TREE_CODE (field
) == FIELD_DECL
)
7226 /* Before PR77728 fix, we were incorrectly considering also
7227 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7228 Make sure we can warn about that with -Wpsabi. */
7231 else if (TREE_CODE (field
) == FIELD_DECL
7232 && DECL_BIT_FIELD_TYPE (field
)
7233 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field
)) > PARM_BOUNDARY
)
7243 /* Determine where to put an argument to a function.
7244 Value is zero to push the argument on the stack,
7245 or a hard register in which to store the argument.
7247 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7248 the preceding args and about the function being called.
7249 ARG is a description of the argument.
7251 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7252 other arguments are passed on the stack. If (NAMED == 0) (which happens
7253 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7254 defined), say it is passed in the stack (function_prologue will
7255 indeed make it pass in the stack if necessary). */
7258 arm_function_arg (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
7260 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7263 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7264 a call insn (op3 of a call_value insn). */
7265 if (arg
.end_marker_p ())
7268 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7270 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7271 return pcum
->aapcs_reg
;
7274 /* Varargs vectors are treated the same as long long.
7275 named_count avoids having to change the way arm handles 'named' */
7276 if (TARGET_IWMMXT_ABI
7277 && arm_vector_mode_supported_p (arg
.mode
)
7278 && pcum
->named_count
> pcum
->nargs
+ 1)
7280 if (pcum
->iwmmxt_nregs
<= 9)
7281 return gen_rtx_REG (arg
.mode
,
7282 pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
7285 pcum
->can_split
= false;
7290 /* Put doubleword aligned quantities in even register pairs. */
7291 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
7293 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
7294 if (res
< 0 && warn_psabi
)
7295 inform (input_location
, "parameter passing for argument of type "
7296 "%qT changed in GCC 7.1", arg
.type
);
7300 if (res
> 1 && warn_psabi
)
7301 inform (input_location
, "parameter passing for argument of type "
7302 "%qT changed in GCC 9.1", arg
.type
);
7306 /* Only allow splitting an arg between regs and memory if all preceding
7307 args were allocated to regs. For args passed by reference we only count
7308 the reference pointer. */
7309 if (pcum
->can_split
)
7312 nregs
= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
7314 if (!arg
.named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
7317 return gen_rtx_REG (arg
.mode
, pcum
->nregs
);
7321 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
7323 if (!ARM_DOUBLEWORD_ALIGN
)
7324 return PARM_BOUNDARY
;
7326 int res
= arm_needs_doubleword_align (mode
, type
);
7327 if (res
< 0 && warn_psabi
)
7328 inform (input_location
, "parameter passing for argument of type %qT "
7329 "changed in GCC 7.1", type
);
7330 if (res
> 1 && warn_psabi
)
7331 inform (input_location
, "parameter passing for argument of type "
7332 "%qT changed in GCC 9.1", type
);
7334 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
7338 arm_arg_partial_bytes (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
7340 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7341 int nregs
= pcum
->nregs
;
7343 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7345 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7346 return pcum
->aapcs_partial
;
7349 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (arg
.mode
))
7352 if (NUM_ARG_REGS
> nregs
7353 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (arg
.mode
, arg
.type
))
7355 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
7360 /* Update the data in PCUM to advance over argument ARG. */
7363 arm_function_arg_advance (cumulative_args_t pcum_v
,
7364 const function_arg_info
&arg
)
7366 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7368 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7370 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7372 if (pcum
->aapcs_cprc_slot
>= 0)
7374 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, arg
.mode
,
7376 pcum
->aapcs_cprc_slot
= -1;
7379 /* Generic stuff. */
7380 pcum
->aapcs_arg_processed
= false;
7381 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
7382 pcum
->aapcs_reg
= NULL_RTX
;
7383 pcum
->aapcs_partial
= 0;
7388 if (arm_vector_mode_supported_p (arg
.mode
)
7389 && pcum
->named_count
> pcum
->nargs
7390 && TARGET_IWMMXT_ABI
)
7391 pcum
->iwmmxt_nregs
+= 1;
7393 pcum
->nregs
+= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
7397 /* Variable sized types are passed by reference. This is a GCC
7398 extension to the ARM ABI. */
7401 arm_pass_by_reference (cumulative_args_t
, const function_arg_info
&arg
)
7403 return arg
.type
&& TREE_CODE (TYPE_SIZE (arg
.type
)) != INTEGER_CST
;
7406 /* Encode the current state of the #pragma [no_]long_calls. */
7409 OFF
, /* No #pragma [no_]long_calls is in effect. */
7410 LONG
, /* #pragma long_calls is in effect. */
7411 SHORT
/* #pragma no_long_calls is in effect. */
7414 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
7417 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7419 arm_pragma_long_calls
= LONG
;
7423 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7425 arm_pragma_long_calls
= SHORT
;
7429 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7431 arm_pragma_long_calls
= OFF
;
7434 /* Handle an attribute requiring a FUNCTION_DECL;
7435 arguments as in struct attribute_spec.handler. */
7437 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
7438 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7440 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7442 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7444 *no_add_attrs
= true;
7450 /* Handle an "interrupt" or "isr" attribute;
7451 arguments as in struct attribute_spec.handler. */
7453 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
7458 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7460 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7462 *no_add_attrs
= true;
7464 else if (TARGET_VFP_BASE
)
7466 warning (OPT_Wattributes
, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7469 /* FIXME: the argument if any is checked for type attributes;
7470 should it be checked for decl ones? */
7474 if (FUNC_OR_METHOD_TYPE_P (*node
))
7476 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
7478 warning (OPT_Wattributes
, "%qE attribute ignored",
7480 *no_add_attrs
= true;
7483 else if (TREE_CODE (*node
) == POINTER_TYPE
7484 && FUNC_OR_METHOD_TYPE_P (TREE_TYPE (*node
))
7485 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
7487 *node
= build_variant_type_copy (*node
);
7488 TREE_TYPE (*node
) = build_type_attribute_variant
7490 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
7491 *no_add_attrs
= true;
7495 /* Possibly pass this attribute on from the type to a decl. */
7496 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
7497 | (int) ATTR_FLAG_FUNCTION_NEXT
7498 | (int) ATTR_FLAG_ARRAY_NEXT
))
7500 *no_add_attrs
= true;
7501 return tree_cons (name
, args
, NULL_TREE
);
7505 warning (OPT_Wattributes
, "%qE attribute ignored",
7514 /* Handle a "pcs" attribute; arguments as in struct
7515 attribute_spec.handler. */
7517 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
7518 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7520 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
7522 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
7523 *no_add_attrs
= true;
7528 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7529 /* Handle the "notshared" attribute. This attribute is another way of
7530 requesting hidden visibility. ARM's compiler supports
7531 "__declspec(notshared)"; we support the same thing via an
7535 arm_handle_notshared_attribute (tree
*node
,
7536 tree name ATTRIBUTE_UNUSED
,
7537 tree args ATTRIBUTE_UNUSED
,
7538 int flags ATTRIBUTE_UNUSED
,
7541 tree decl
= TYPE_NAME (*node
);
7545 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
7546 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
7547 *no_add_attrs
= false;
7553 /* This function returns true if a function with declaration FNDECL and type
7554 FNTYPE uses the stack to pass arguments or return variables and false
7555 otherwise. This is used for functions with the attributes
7556 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7557 diagnostic messages if the stack is used. NAME is the name of the attribute
7561 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
7563 function_args_iterator args_iter
;
7564 CUMULATIVE_ARGS args_so_far_v
;
7565 cumulative_args_t args_so_far
;
7566 bool first_param
= true;
7567 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
7569 /* Error out if any argument is passed on the stack. */
7570 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
7571 args_so_far
= pack_cumulative_args (&args_so_far_v
);
7572 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
7576 prev_arg_type
= arg_type
;
7577 if (VOID_TYPE_P (arg_type
))
7580 function_arg_info
arg (arg_type
, /*named=*/true);
7582 /* ??? We should advance after processing the argument and pass
7583 the argument we're advancing past. */
7584 arm_function_arg_advance (args_so_far
, arg
);
7585 arg_rtx
= arm_function_arg (args_so_far
, arg
);
7586 if (!arg_rtx
|| arm_arg_partial_bytes (args_so_far
, arg
))
7588 error ("%qE attribute not available to functions with arguments "
7589 "passed on the stack", name
);
7592 first_param
= false;
7595 /* Error out for variadic functions since we cannot control how many
7596 arguments will be passed and thus stack could be used. stdarg_p () is not
7597 used for the checking to avoid browsing arguments twice. */
7598 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
7600 error ("%qE attribute not available to functions with variable number "
7601 "of arguments", name
);
7605 /* Error out if return value is passed on the stack. */
7606 ret_type
= TREE_TYPE (fntype
);
7607 if (arm_return_in_memory (ret_type
, fntype
))
7609 error ("%qE attribute not available to functions that return value on "
7616 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7617 function will check whether the attribute is allowed here and will add the
7618 attribute to the function declaration tree or otherwise issue a warning. */
7621 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
7630 *no_add_attrs
= true;
7631 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7636 /* Ignore attribute for function types. */
7637 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7639 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7641 *no_add_attrs
= true;
7647 /* Warn for static linkage functions. */
7648 if (!TREE_PUBLIC (fndecl
))
7650 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
7651 "with static linkage", name
);
7652 *no_add_attrs
= true;
7656 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
7657 TREE_TYPE (fndecl
));
7662 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7663 function will check whether the attribute is allowed here and will add the
7664 attribute to the function type tree or otherwise issue a diagnostic. The
7665 reason we check this at declaration time is to only allow the use of the
7666 attribute with declarations of function pointers and not function
7667 declarations. This function checks NODE is of the expected type and issues
7668 diagnostics otherwise using NAME. If it is not of the expected type
7669 *NO_ADD_ATTRS will be set to true. */
7672 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
7677 tree decl
= NULL_TREE
;
7682 *no_add_attrs
= true;
7683 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7690 fntype
= TREE_TYPE (*node
);
7692 if (VAR_P (*node
) || TREE_CODE (*node
) == TYPE_DECL
)
7698 while (fntype
&& TREE_CODE (fntype
) == POINTER_TYPE
)
7699 fntype
= TREE_TYPE (fntype
);
7701 if ((DECL_P (*node
) && !decl
) || TREE_CODE (fntype
) != FUNCTION_TYPE
)
7703 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
7704 "function pointer", name
);
7705 *no_add_attrs
= true;
7709 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
7714 /* Prevent trees being shared among function types with and without
7715 cmse_nonsecure_call attribute. */
7718 type
= build_distinct_type_copy (TREE_TYPE (decl
));
7719 TREE_TYPE (decl
) = type
;
7723 type
= build_distinct_type_copy (*node
);
7729 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
7732 fntype
= TREE_TYPE (fntype
);
7733 fntype
= build_distinct_type_copy (fntype
);
7734 TREE_TYPE (type
) = fntype
;
7737 /* Construct a type attribute and add it to the function type. */
7738 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
7739 TYPE_ATTRIBUTES (fntype
));
7740 TYPE_ATTRIBUTES (fntype
) = attrs
;
7744 /* Return 0 if the attributes for two types are incompatible, 1 if they
7745 are compatible, and 2 if they are nearly compatible (which causes a
7746 warning to be generated). */
7748 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7752 tree attrs1
= lookup_attribute ("Advanced SIMD type",
7753 TYPE_ATTRIBUTES (type1
));
7754 tree attrs2
= lookup_attribute ("Advanced SIMD type",
7755 TYPE_ATTRIBUTES (type2
));
7756 if (bool (attrs1
) != bool (attrs2
))
7758 if (attrs1
&& !attribute_value_equal (attrs1
, attrs2
))
7761 /* Check for mismatch of non-default calling convention. */
7762 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7765 /* Check for mismatched call attributes. */
7766 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7767 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7768 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7769 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7771 /* Only bother to check if an attribute is defined. */
7772 if (l1
| l2
| s1
| s2
)
7774 /* If one type has an attribute, the other must have the same attribute. */
7775 if ((l1
!= l2
) || (s1
!= s2
))
7778 /* Disallow mixed attributes. */
7779 if ((l1
& s2
) || (l2
& s1
))
7783 /* Check for mismatched ISR attribute. */
7784 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7786 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7787 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7789 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7793 l1
= lookup_attribute ("cmse_nonsecure_call",
7794 TYPE_ATTRIBUTES (type1
)) != NULL
;
7795 l2
= lookup_attribute ("cmse_nonsecure_call",
7796 TYPE_ATTRIBUTES (type2
)) != NULL
;
7804 /* Assigns default attributes to newly defined type. This is used to
7805 set short_call/long_call attributes for function types of
7806 functions defined inside corresponding #pragma scopes. */
7808 arm_set_default_type_attributes (tree type
)
7810 /* Add __attribute__ ((long_call)) to all functions, when
7811 inside #pragma long_calls or __attribute__ ((short_call)),
7812 when inside #pragma no_long_calls. */
7813 if (FUNC_OR_METHOD_TYPE_P (type
))
7815 tree type_attr_list
, attr_name
;
7816 type_attr_list
= TYPE_ATTRIBUTES (type
);
7818 if (arm_pragma_long_calls
== LONG
)
7819 attr_name
= get_identifier ("long_call");
7820 else if (arm_pragma_long_calls
== SHORT
)
7821 attr_name
= get_identifier ("short_call");
7825 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7826 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7830 /* Return true if DECL is known to be linked into section SECTION. */
7833 arm_function_in_section_p (tree decl
, section
*section
)
7835 /* We can only be certain about the prevailing symbol definition. */
7836 if (!decl_binds_to_current_def_p (decl
))
7839 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7840 if (!DECL_SECTION_NAME (decl
))
7842 /* Make sure that we will not create a unique section for DECL. */
7843 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7847 return function_section (decl
) == section
;
7850 /* Return nonzero if a 32-bit "long_call" should be generated for
7851 a call from the current function to DECL. We generate a long_call
7854 a. has an __attribute__((long call))
7855 or b. is within the scope of a #pragma long_calls
7856 or c. the -mlong-calls command line switch has been specified
7858 However we do not generate a long call if the function:
7860 d. has an __attribute__ ((short_call))
7861 or e. is inside the scope of a #pragma no_long_calls
7862 or f. is defined in the same section as the current function. */
7865 arm_is_long_call_p (tree decl
)
7870 return TARGET_LONG_CALLS
;
7872 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7873 if (lookup_attribute ("short_call", attrs
))
7876 /* For "f", be conservative, and only cater for cases in which the
7877 whole of the current function is placed in the same section. */
7878 if (!flag_reorder_blocks_and_partition
7879 && TREE_CODE (decl
) == FUNCTION_DECL
7880 && arm_function_in_section_p (decl
, current_function_section ()))
7883 if (lookup_attribute ("long_call", attrs
))
7886 return TARGET_LONG_CALLS
;
7889 /* Return nonzero if it is ok to make a tail-call to DECL. */
7891 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7893 unsigned long func_type
;
7895 if (cfun
->machine
->sibcall_blocked
)
7900 /* In FDPIC, never tailcall something for which we have no decl:
7901 the target function could be in a different module, requiring
7902 a different FDPIC register value. */
7907 /* Never tailcall something if we are generating code for Thumb-1. */
7911 /* The PIC register is live on entry to VxWorks PLT entries, so we
7912 must make the call before restoring the PIC register. */
7913 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7916 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7917 may be used both as target of the call and base register for restoring
7918 the VFP registers */
7919 if (TARGET_APCS_FRAME
&& TARGET_ARM
7920 && TARGET_HARD_FLOAT
7921 && decl
&& arm_is_long_call_p (decl
))
7924 /* If we are interworking and the function is not declared static
7925 then we can't tail-call it unless we know that it exists in this
7926 compilation unit (since it might be a Thumb routine). */
7927 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7928 && !TREE_ASM_WRITTEN (decl
))
7931 func_type
= arm_current_func_type ();
7932 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7933 if (IS_INTERRUPT (func_type
))
7936 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7937 generated for entry functions themselves. */
7938 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7941 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7942 this would complicate matters for later code generation. */
7943 if (TREE_CODE (exp
) == CALL_EXPR
)
7945 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7946 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7950 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7952 /* Check that the return value locations are the same. For
7953 example that we aren't returning a value from the sibling in
7954 a VFP register but then need to transfer it to a core
7957 tree decl_or_type
= decl
;
7959 /* If it is an indirect function pointer, get the function type. */
7961 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7963 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7964 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7966 if (!rtx_equal_p (a
, b
))
7970 /* Never tailcall if function may be called with a misaligned SP. */
7971 if (IS_STACKALIGN (func_type
))
7974 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7975 references should become a NOP. Don't convert such calls into
7977 if (TARGET_AAPCS_BASED
7978 && arm_abi
== ARM_ABI_AAPCS
7980 && DECL_WEAK (decl
))
7983 /* We cannot do a tailcall for an indirect call by descriptor if all the
7984 argument registers are used because the only register left to load the
7985 address is IP and it will already contain the static chain. */
7986 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7988 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7989 CUMULATIVE_ARGS cum
;
7990 cumulative_args_t cum_v
;
7992 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7993 cum_v
= pack_cumulative_args (&cum
);
7995 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7997 tree type
= TREE_VALUE (t
);
7998 if (!VOID_TYPE_P (type
))
8000 function_arg_info
arg (type
, /*named=*/true);
8001 arm_function_arg_advance (cum_v
, arg
);
8005 function_arg_info
arg (integer_type_node
, /*named=*/true);
8006 if (!arm_function_arg (cum_v
, arg
))
8010 /* Everything else is ok. */
8015 /* Addressing mode support functions. */
8017 /* Return nonzero if X is a legitimate immediate operand when compiling
8018 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
8020 legitimate_pic_operand_p (rtx x
)
8022 if (SYMBOL_REF_P (x
)
8023 || (GET_CODE (x
) == CONST
8024 && GET_CODE (XEXP (x
, 0)) == PLUS
8025 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
8031 /* Record that the current function needs a PIC register. If PIC_REG is null,
8032 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
8033 both case cfun->machine->pic_reg is initialized if we have not already done
8034 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
8035 PIC register is reloaded in the current position of the instruction stream
8036 irregardless of whether it was loaded before. Otherwise, it is only loaded
8037 if not already done so (crtl->uses_pic_offset_table is null). Note that
8038 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
8039 is only supported iff COMPUTE_NOW is false. */
8042 require_pic_register (rtx pic_reg
, bool compute_now
)
8044 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
8046 /* A lot of the logic here is made obscure by the fact that this
8047 routine gets called as part of the rtx cost estimation process.
8048 We don't want those calls to affect any assumptions about the real
8049 function; and further, we can't call entry_of_function() until we
8050 start the real expansion process. */
8051 if (!crtl
->uses_pic_offset_table
|| compute_now
)
8053 gcc_assert (can_create_pseudo_p ()
8054 || (pic_reg
!= NULL_RTX
8056 && GET_MODE (pic_reg
) == Pmode
));
8057 if (arm_pic_register
!= INVALID_REGNUM
8059 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
8061 if (!cfun
->machine
->pic_reg
)
8062 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
8064 /* Play games to avoid marking the function as needing pic
8065 if we are being called as part of the cost-estimation
8067 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
8068 crtl
->uses_pic_offset_table
= 1;
8072 rtx_insn
*seq
, *insn
;
8074 if (pic_reg
== NULL_RTX
)
8075 pic_reg
= gen_reg_rtx (Pmode
);
8076 if (!cfun
->machine
->pic_reg
)
8077 cfun
->machine
->pic_reg
= pic_reg
;
8079 /* Play games to avoid marking the function as needing pic
8080 if we are being called as part of the cost-estimation
8082 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
8084 crtl
->uses_pic_offset_table
= 1;
8087 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
8088 && arm_pic_register
> LAST_LO_REGNUM
8090 emit_move_insn (cfun
->machine
->pic_reg
,
8091 gen_rtx_REG (Pmode
, arm_pic_register
));
8093 arm_load_pic_register (0UL, pic_reg
);
8098 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
8100 INSN_LOCATION (insn
) = prologue_location
;
8102 /* We can be called during expansion of PHI nodes, where
8103 we can't yet emit instructions directly in the final
8104 insn stream. Queue the insns on the entry edge, they will
8105 be committed after everything else is expanded. */
8106 if (currently_expanding_to_rtl
)
8107 insert_insn_on_edge (seq
,
8109 (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
8117 /* Generate insns to calculate the address of ORIG in pic mode. */
8119 calculate_pic_address_constant (rtx reg
, rtx pic_reg
, rtx orig
)
8124 pat
= gen_calculate_pic_address (reg
, pic_reg
, orig
);
8126 /* Make the MEM as close to a constant as possible. */
8127 mem
= SET_SRC (pat
);
8128 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
8129 MEM_READONLY_P (mem
) = 1;
8130 MEM_NOTRAP_P (mem
) = 1;
8132 return emit_insn (pat
);
8135 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
8136 created to hold the result of the load. If not NULL, PIC_REG indicates
8137 which register to use as PIC register, otherwise it is decided by register
8138 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
8139 location in the instruction stream, irregardless of whether it was loaded
8140 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8141 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8143 Returns the register REG into which the PIC load is performed. */
8146 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
, rtx pic_reg
,
8149 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
8151 if (SYMBOL_REF_P (orig
)
8152 || LABEL_REF_P (orig
))
8156 gcc_assert (can_create_pseudo_p ());
8157 reg
= gen_reg_rtx (Pmode
);
8160 /* VxWorks does not impose a fixed gap between segments; the run-time
8161 gap can be different from the object-file gap. We therefore can't
8162 use GOTOFF unless we are absolutely sure that the symbol is in the
8163 same segment as the GOT. Unfortunately, the flexibility of linker
8164 scripts means that we can't be sure of that in general, so assume
8165 that GOTOFF is never valid on VxWorks. */
8166 /* References to weak symbols cannot be resolved locally: they
8167 may be overridden by a non-weak definition at link time. */
8169 if ((LABEL_REF_P (orig
)
8170 || (SYMBOL_REF_P (orig
)
8171 && SYMBOL_REF_LOCAL_P (orig
)
8172 && (SYMBOL_REF_DECL (orig
)
8173 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)
8174 && (!SYMBOL_REF_FUNCTION_P (orig
)
8175 || arm_fdpic_local_funcdesc_p (orig
))))
8177 && arm_pic_data_is_text_relative
)
8178 insn
= arm_pic_static_addr (orig
, reg
);
8181 /* If this function doesn't have a pic register, create one now. */
8182 require_pic_register (pic_reg
, compute_now
);
8184 if (pic_reg
== NULL_RTX
)
8185 pic_reg
= cfun
->machine
->pic_reg
;
8187 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
8190 /* Put a REG_EQUAL note on this insn, so that it can be optimized
8192 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
8196 else if (GET_CODE (orig
) == CONST
)
8200 if (GET_CODE (XEXP (orig
, 0)) == PLUS
8201 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
8204 /* Handle the case where we have: const (UNSPEC_TLS). */
8205 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
8206 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
8209 /* Handle the case where we have:
8210 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
8212 if (GET_CODE (XEXP (orig
, 0)) == PLUS
8213 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
8214 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
8216 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
8222 gcc_assert (can_create_pseudo_p ());
8223 reg
= gen_reg_rtx (Pmode
);
8226 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
8228 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
,
8229 pic_reg
, compute_now
);
8230 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
8231 base
== reg
? 0 : reg
, pic_reg
,
8234 if (CONST_INT_P (offset
))
8236 /* The base register doesn't really matter, we only want to
8237 test the index for the appropriate mode. */
8238 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
8240 gcc_assert (can_create_pseudo_p ());
8241 offset
= force_reg (Pmode
, offset
);
8244 if (CONST_INT_P (offset
))
8245 return plus_constant (Pmode
, base
, INTVAL (offset
));
8248 if (GET_MODE_SIZE (mode
) > 4
8249 && (GET_MODE_CLASS (mode
) == MODE_INT
8250 || TARGET_SOFT_FLOAT
))
8252 emit_insn (gen_addsi3 (reg
, base
, offset
));
8256 return gen_rtx_PLUS (Pmode
, base
, offset
);
8263 /* Generate insns that produce the address of the stack canary */
8265 arm_stack_protect_tls_canary_mem (bool reload
)
8267 rtx tp
= gen_reg_rtx (SImode
);
8269 emit_insn (gen_reload_tp_hard (tp
));
8271 emit_insn (gen_load_tp_hard (tp
));
8273 rtx reg
= gen_reg_rtx (SImode
);
8274 rtx offset
= GEN_INT (arm_stack_protector_guard_offset
);
8275 emit_set_insn (reg
, gen_rtx_PLUS (SImode
, tp
, offset
));
8276 return gen_rtx_MEM (SImode
, reg
);
8280 /* Whether a register is callee saved or not. This is necessary because high
8281 registers are marked as caller saved when optimizing for size on Thumb-1
8282 targets despite being callee saved in order to avoid using them. */
8283 #define callee_saved_reg_p(reg) \
8284 (!call_used_or_fixed_reg_p (reg) \
8285 || (TARGET_THUMB1 && optimize_size \
8286 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8288 /* Return a mask for the call-clobbered low registers that are unused
8289 at the end of the prologue. */
8290 static unsigned long
8291 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8293 unsigned long mask
= 0;
8294 bitmap prologue_live_out
= df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
8296 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
8297 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (prologue_live_out
, reg
))
8298 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
8302 /* Similarly for the start of the epilogue. */
8303 static unsigned long
8304 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8306 unsigned long mask
= 0;
8307 bitmap epilogue_live_in
= df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun
));
8309 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
8310 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (epilogue_live_in
, reg
))
8311 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
8315 /* Find a spare register to use during the prolog of a function. */
8318 thumb_find_work_register (unsigned long pushed_regs_mask
)
8322 unsigned long unused_regs
8323 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8325 /* Check the argument registers first as these are call-used. The
8326 register allocation order means that sometimes r3 might be used
8327 but earlier argument registers might not, so check them all. */
8328 for (reg
= LAST_LO_REGNUM
; reg
>= FIRST_LO_REGNUM
; reg
--)
8329 if (unused_regs
& (1 << (reg
- FIRST_LO_REGNUM
)))
8332 /* Otherwise look for a call-saved register that is going to be pushed. */
8333 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
8334 if (pushed_regs_mask
& (1 << reg
))
8339 /* Thumb-2 can use high regs. */
8340 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
8341 if (pushed_regs_mask
& (1 << reg
))
8344 /* Something went wrong - thumb_compute_save_reg_mask()
8345 should have arranged for a suitable register to be pushed. */
8349 static GTY(()) int pic_labelno
;
8351 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8355 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
, rtx pic_reg
)
8357 rtx l1
, labelno
, pic_tmp
, pic_rtx
;
8359 if (crtl
->uses_pic_offset_table
== 0
8360 || TARGET_SINGLE_PIC_BASE
8364 gcc_assert (flag_pic
);
8366 if (pic_reg
== NULL_RTX
)
8367 pic_reg
= cfun
->machine
->pic_reg
;
8368 if (TARGET_VXWORKS_RTP
)
8370 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
8371 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
8372 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
8374 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
8376 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8377 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
8381 /* We use an UNSPEC rather than a LABEL_REF because this label
8382 never appears in the code stream. */
8384 labelno
= GEN_INT (pic_labelno
++);
8385 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8386 l1
= gen_rtx_CONST (VOIDmode
, l1
);
8388 /* On the ARM the PC register contains 'dot + 8' at the time of the
8389 addition, on the Thumb it is 'dot + 4'. */
8390 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
8391 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
8393 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
8397 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8399 else /* TARGET_THUMB1 */
8401 if (arm_pic_register
!= INVALID_REGNUM
8402 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
8404 /* We will have pushed the pic register, so we should always be
8405 able to find a work register. */
8406 pic_tmp
= gen_rtx_REG (SImode
,
8407 thumb_find_work_register (saved_regs
));
8408 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
8409 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
8410 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
8412 else if (arm_pic_register
!= INVALID_REGNUM
8413 && arm_pic_register
> LAST_LO_REGNUM
8414 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
8416 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8417 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
8418 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
8421 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8425 /* Need to emit this whether or not we obey regdecls,
8426 since setjmp/longjmp can cause life info to screw up. */
8430 /* Try to determine whether an object, referenced via ORIG, will be
8431 placed in the text or data segment. This is used in FDPIC mode, to
8432 decide which relocations to use when accessing ORIG. *IS_READONLY
8433 is set to true if ORIG is a read-only location, false otherwise.
8434 Return true if we could determine the location of ORIG, false
8435 otherwise. *IS_READONLY is valid only when we return true. */
8437 arm_is_segment_info_known (rtx orig
, bool *is_readonly
)
8439 *is_readonly
= false;
8441 if (LABEL_REF_P (orig
))
8443 *is_readonly
= true;
8447 if (SYMBOL_REF_P (orig
))
8449 if (CONSTANT_POOL_ADDRESS_P (orig
))
8451 *is_readonly
= true;
8454 if (SYMBOL_REF_LOCAL_P (orig
)
8455 && !SYMBOL_REF_EXTERNAL_P (orig
)
8456 && SYMBOL_REF_DECL (orig
)
8457 && (!DECL_P (SYMBOL_REF_DECL (orig
))
8458 || !DECL_COMMON (SYMBOL_REF_DECL (orig
))))
8460 tree decl
= SYMBOL_REF_DECL (orig
);
8461 tree init
= VAR_P (decl
)
8462 ? DECL_INITIAL (decl
) : (TREE_CODE (decl
) == CONSTRUCTOR
)
8465 bool named_section
, readonly
;
8467 if (init
&& init
!= error_mark_node
)
8468 reloc
= compute_reloc_for_constant (init
);
8470 named_section
= VAR_P (decl
)
8471 && lookup_attribute ("section", DECL_ATTRIBUTES (decl
));
8472 readonly
= decl_readonly_section (decl
, reloc
);
8474 /* We don't know where the link script will put a named
8475 section, so return false in such a case. */
8479 *is_readonly
= readonly
;
8483 /* We don't know. */
8490 /* Generate code to load the address of a static var when flag_pic is set. */
8492 arm_pic_static_addr (rtx orig
, rtx reg
)
8494 rtx l1
, labelno
, offset_rtx
;
8497 gcc_assert (flag_pic
);
8499 bool is_readonly
= false;
8500 bool info_known
= false;
8503 && SYMBOL_REF_P (orig
)
8504 && !SYMBOL_REF_FUNCTION_P (orig
))
8505 info_known
= arm_is_segment_info_known (orig
, &is_readonly
);
8508 && SYMBOL_REF_P (orig
)
8509 && !SYMBOL_REF_FUNCTION_P (orig
)
8512 /* We don't know where orig is stored, so we have be
8513 pessimistic and use a GOT relocation. */
8514 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
8516 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
8518 else if (TARGET_FDPIC
8519 && SYMBOL_REF_P (orig
)
8520 && (SYMBOL_REF_FUNCTION_P (orig
)
8523 /* We use the GOTOFF relocation. */
8524 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
8526 rtx l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, orig
), UNSPEC_PIC_SYM
);
8527 emit_insn (gen_movsi (reg
, l1
));
8528 insn
= emit_insn (gen_addsi3 (reg
, reg
, pic_reg
));
8532 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8533 PC-relative access. */
8534 /* We use an UNSPEC rather than a LABEL_REF because this label
8535 never appears in the code stream. */
8536 labelno
= GEN_INT (pic_labelno
++);
8537 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8538 l1
= gen_rtx_CONST (VOIDmode
, l1
);
8540 /* On the ARM the PC register contains 'dot + 8' at the time of the
8541 addition, on the Thumb it is 'dot + 4'. */
8542 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
8543 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
8544 UNSPEC_SYMBOL_OFFSET
);
8545 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
8547 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
,
8554 /* Return nonzero if X is valid as an ARM state addressing register. */
8556 arm_address_register_rtx_p (rtx x
, int strict_p
)
8566 return ARM_REGNO_OK_FOR_BASE_P (regno
);
8568 return (regno
<= LAST_ARM_REGNUM
8569 || regno
>= FIRST_PSEUDO_REGISTER
8570 || regno
== FRAME_POINTER_REGNUM
8571 || regno
== ARG_POINTER_REGNUM
);
8574 /* Return TRUE if this rtx is the difference of a symbol and a label,
8575 and will reduce to a PC-relative relocation in the object file.
8576 Expressions like this can be left alone when generating PIC, rather
8577 than forced through the GOT. */
8579 pcrel_constant_p (rtx x
)
8581 if (GET_CODE (x
) == MINUS
)
8582 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
8587 /* Return true if X will surely end up in an index register after next
8590 will_be_in_index_register (const_rtx x
)
8592 /* arm.md: calculate_pic_address will split this into a register. */
8593 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
8596 /* Return nonzero if X is a valid ARM state address operand. */
8598 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
8602 enum rtx_code code
= GET_CODE (x
);
8604 if (arm_address_register_rtx_p (x
, strict_p
))
8607 use_ldrd
= (TARGET_LDRD
8608 && (mode
== DImode
|| mode
== DFmode
));
8610 if (code
== POST_INC
|| code
== PRE_DEC
8611 || ((code
== PRE_INC
|| code
== POST_DEC
)
8612 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8613 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8615 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8616 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8617 && GET_CODE (XEXP (x
, 1)) == PLUS
8618 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8620 rtx addend
= XEXP (XEXP (x
, 1), 1);
8622 /* Don't allow ldrd post increment by register because it's hard
8623 to fixup invalid register choices. */
8625 && GET_CODE (x
) == POST_MODIFY
8629 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
8630 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
8633 /* After reload constants split into minipools will have addresses
8634 from a LABEL_REF. */
8635 else if (reload_completed
8636 && (code
== LABEL_REF
8638 && GET_CODE (XEXP (x
, 0)) == PLUS
8639 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8640 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8643 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
8646 else if (code
== PLUS
)
8648 rtx xop0
= XEXP (x
, 0);
8649 rtx xop1
= XEXP (x
, 1);
8651 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8652 && ((CONST_INT_P (xop1
)
8653 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
8654 || (!strict_p
&& will_be_in_index_register (xop1
))))
8655 || (arm_address_register_rtx_p (xop1
, strict_p
)
8656 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
8660 /* Reload currently can't handle MINUS, so disable this for now */
8661 else if (GET_CODE (x
) == MINUS
)
8663 rtx xop0
= XEXP (x
, 0);
8664 rtx xop1
= XEXP (x
, 1);
8666 return (arm_address_register_rtx_p (xop0
, strict_p
)
8667 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
8671 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8672 && code
== SYMBOL_REF
8673 && CONSTANT_POOL_ADDRESS_P (x
)
8675 && symbol_mentioned_p (get_pool_constant (x
))
8676 && ! pcrel_constant_p (get_pool_constant (x
))))
8682 /* Return true if we can avoid creating a constant pool entry for x. */
8684 can_avoid_literal_pool_for_label_p (rtx x
)
8686 /* Normally we can assign constant values to target registers without
8687 the help of constant pool. But there are cases we have to use constant
8689 1) assign a label to register.
8690 2) sign-extend a 8bit value to 32bit and then assign to register.
8692 Constant pool access in format:
8693 (set (reg r0) (mem (symbol_ref (".LC0"))))
8694 will cause the use of literal pool (later in function arm_reorg).
8695 So here we mark such format as an invalid format, then the compiler
8696 will adjust it into:
8697 (set (reg r0) (symbol_ref (".LC0")))
8698 (set (reg r0) (mem (reg r0))).
8699 No extra register is required, and (mem (reg r0)) won't cause the use
8700 of literal pools. */
8701 if (arm_disable_literal_pool
&& SYMBOL_REF_P (x
)
8702 && CONSTANT_POOL_ADDRESS_P (x
))
8708 /* Return nonzero if X is a valid Thumb-2 address operand. */
8710 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8713 enum rtx_code code
= GET_CODE (x
);
8715 /* If we are dealing with a MVE predicate mode, then treat it as a HImode as
8716 can store and load it like any other 16-bit value. */
8717 if (TARGET_HAVE_MVE
&& VALID_MVE_PRED_MODE (mode
))
8720 if (TARGET_HAVE_MVE
&& VALID_MVE_MODE (mode
))
8721 return mve_vector_mem_operand (mode
, x
, strict_p
);
8723 if (arm_address_register_rtx_p (x
, strict_p
))
8726 use_ldrd
= (TARGET_LDRD
8727 && (mode
== DImode
|| mode
== DFmode
));
8729 if (code
== POST_INC
|| code
== PRE_DEC
8730 || ((code
== PRE_INC
|| code
== POST_DEC
)
8731 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8732 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8734 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8735 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8736 && GET_CODE (XEXP (x
, 1)) == PLUS
8737 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8739 /* Thumb-2 only has autoincrement by constant. */
8740 rtx addend
= XEXP (XEXP (x
, 1), 1);
8741 HOST_WIDE_INT offset
;
8743 if (!CONST_INT_P (addend
))
8746 offset
= INTVAL(addend
);
8747 if (GET_MODE_SIZE (mode
) <= 4)
8748 return (offset
> -256 && offset
< 256);
8750 return (use_ldrd
&& offset
> -1024 && offset
< 1024
8751 && (offset
& 3) == 0);
8754 /* After reload constants split into minipools will have addresses
8755 from a LABEL_REF. */
8756 else if (reload_completed
8757 && (code
== LABEL_REF
8759 && GET_CODE (XEXP (x
, 0)) == PLUS
8760 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8761 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8764 else if (mode
== TImode
8765 || (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
8766 || (TARGET_HAVE_MVE
&& VALID_MVE_STRUCT_MODE (mode
)))
8769 else if (code
== PLUS
)
8771 rtx xop0
= XEXP (x
, 0);
8772 rtx xop1
= XEXP (x
, 1);
8774 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8775 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
8776 || (!strict_p
&& will_be_in_index_register (xop1
))))
8777 || (arm_address_register_rtx_p (xop1
, strict_p
)
8778 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
8781 else if (can_avoid_literal_pool_for_label_p (x
))
8784 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8785 && code
== SYMBOL_REF
8786 && CONSTANT_POOL_ADDRESS_P (x
)
8788 && symbol_mentioned_p (get_pool_constant (x
))
8789 && ! pcrel_constant_p (get_pool_constant (x
))))
8795 /* Return nonzero if INDEX is valid for an address index operand in
8798 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
8801 HOST_WIDE_INT range
;
8802 enum rtx_code code
= GET_CODE (index
);
8804 /* Standard coprocessor addressing modes. */
8805 if (TARGET_HARD_FLOAT
8806 && (mode
== SFmode
|| mode
== DFmode
))
8807 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8808 && INTVAL (index
) > -1024
8809 && (INTVAL (index
) & 3) == 0);
8811 /* For quad modes, we restrict the constant offset to be slightly less
8812 than what the instruction format permits. We do this because for
8813 quad mode moves, we will actually decompose them into two separate
8814 double-mode reads or writes. INDEX must therefore be a valid
8815 (double-mode) offset and so should INDEX+8. */
8816 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8817 return (code
== CONST_INT
8818 && INTVAL (index
) < 1016
8819 && INTVAL (index
) > -1024
8820 && (INTVAL (index
) & 3) == 0);
8822 /* We have no such constraint on double mode offsets, so we permit the
8823 full range of the instruction format. */
8824 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8825 return (code
== CONST_INT
8826 && INTVAL (index
) < 1024
8827 && INTVAL (index
) > -1024
8828 && (INTVAL (index
) & 3) == 0);
8830 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8831 return (code
== CONST_INT
8832 && INTVAL (index
) < 1024
8833 && INTVAL (index
) > -1024
8834 && (INTVAL (index
) & 3) == 0);
8836 if (arm_address_register_rtx_p (index
, strict_p
)
8837 && (GET_MODE_SIZE (mode
) <= 4))
8840 if (mode
== DImode
|| mode
== DFmode
)
8842 if (code
== CONST_INT
)
8844 HOST_WIDE_INT val
= INTVAL (index
);
8846 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8847 If vldr is selected it uses arm_coproc_mem_operand. */
8849 return val
> -256 && val
< 256;
8851 return val
> -4096 && val
< 4092;
8854 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
8857 if (GET_MODE_SIZE (mode
) <= 4
8861 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
8865 rtx xiop0
= XEXP (index
, 0);
8866 rtx xiop1
= XEXP (index
, 1);
8868 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8869 && power_of_two_operand (xiop1
, SImode
))
8870 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8871 && power_of_two_operand (xiop0
, SImode
)));
8873 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
8874 || code
== ASHIFT
|| code
== ROTATERT
)
8876 rtx op
= XEXP (index
, 1);
8878 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8881 && INTVAL (op
) <= 31);
8885 /* For ARM v4 we may be doing a sign-extend operation during the
8891 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
8897 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
8899 return (code
== CONST_INT
8900 && INTVAL (index
) < range
8901 && INTVAL (index
) > -range
);
8904 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8905 index operand. i.e. 1, 2, 4 or 8. */
8907 thumb2_index_mul_operand (rtx op
)
8911 if (!CONST_INT_P (op
))
8915 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
8918 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8920 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
8922 enum rtx_code code
= GET_CODE (index
);
8924 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8925 /* Standard coprocessor addressing modes. */
8927 && (mode
== SFmode
|| mode
== DFmode
))
8928 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8929 /* Thumb-2 allows only > -256 index range for it's core register
8930 load/stores. Since we allow SF/DF in core registers, we have
8931 to use the intersection between -256~4096 (core) and -1024~1024
8933 && INTVAL (index
) > -256
8934 && (INTVAL (index
) & 3) == 0);
8936 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8938 /* For DImode assume values will usually live in core regs
8939 and only allow LDRD addressing modes. */
8940 if (!TARGET_LDRD
|| mode
!= DImode
)
8941 return (code
== CONST_INT
8942 && INTVAL (index
) < 1024
8943 && INTVAL (index
) > -1024
8944 && (INTVAL (index
) & 3) == 0);
8947 /* For quad modes, we restrict the constant offset to be slightly less
8948 than what the instruction format permits. We do this because for
8949 quad mode moves, we will actually decompose them into two separate
8950 double-mode reads or writes. INDEX must therefore be a valid
8951 (double-mode) offset and so should INDEX+8. */
8952 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8953 return (code
== CONST_INT
8954 && INTVAL (index
) < 1016
8955 && INTVAL (index
) > -1024
8956 && (INTVAL (index
) & 3) == 0);
8958 /* We have no such constraint on double mode offsets, so we permit the
8959 full range of the instruction format. */
8960 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8961 return (code
== CONST_INT
8962 && INTVAL (index
) < 1024
8963 && INTVAL (index
) > -1024
8964 && (INTVAL (index
) & 3) == 0);
8966 if (arm_address_register_rtx_p (index
, strict_p
)
8967 && (GET_MODE_SIZE (mode
) <= 4))
8970 if (mode
== DImode
|| mode
== DFmode
)
8972 if (code
== CONST_INT
)
8974 HOST_WIDE_INT val
= INTVAL (index
);
8975 /* Thumb-2 ldrd only has reg+const addressing modes.
8976 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8977 If vldr is selected it uses arm_coproc_mem_operand. */
8979 return IN_RANGE (val
, -1020, 1020) && (val
& 3) == 0;
8981 return IN_RANGE (val
, -255, 4095 - 4);
8989 rtx xiop0
= XEXP (index
, 0);
8990 rtx xiop1
= XEXP (index
, 1);
8992 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8993 && thumb2_index_mul_operand (xiop1
))
8994 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8995 && thumb2_index_mul_operand (xiop0
)));
8997 else if (code
== ASHIFT
)
8999 rtx op
= XEXP (index
, 1);
9001 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
9004 && INTVAL (op
) <= 3);
9007 return (code
== CONST_INT
9008 && INTVAL (index
) < 4096
9009 && INTVAL (index
) > -256);
9012 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
9014 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
9024 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
9026 return (regno
<= LAST_LO_REGNUM
9027 || regno
> LAST_VIRTUAL_REGISTER
9028 || regno
== FRAME_POINTER_REGNUM
9029 || (GET_MODE_SIZE (mode
) >= 4
9030 && (regno
== STACK_POINTER_REGNUM
9031 || regno
>= FIRST_PSEUDO_REGISTER
9032 || x
== hard_frame_pointer_rtx
9033 || x
== arg_pointer_rtx
)));
9036 /* Return nonzero if x is a legitimate index register. This is the case
9037 for any base register that can access a QImode object. */
9039 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
9041 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
9044 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
9046 The AP may be eliminated to either the SP or the FP, so we use the
9047 least common denominator, e.g. SImode, and offsets from 0 to 64.
9049 ??? Verify whether the above is the right approach.
9051 ??? Also, the FP may be eliminated to the SP, so perhaps that
9052 needs special handling also.
9054 ??? Look at how the mips16 port solves this problem. It probably uses
9055 better ways to solve some of these problems.
9057 Although it is not incorrect, we don't accept QImode and HImode
9058 addresses based on the frame pointer or arg pointer until the
9059 reload pass starts. This is so that eliminating such addresses
9060 into stack based ones won't produce impossible code. */
9062 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
9064 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
9067 /* ??? Not clear if this is right. Experiment. */
9068 if (GET_MODE_SIZE (mode
) < 4
9069 && !(reload_in_progress
|| reload_completed
)
9070 && (reg_mentioned_p (frame_pointer_rtx
, x
)
9071 || reg_mentioned_p (arg_pointer_rtx
, x
)
9072 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
9073 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
9074 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
9075 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
9078 /* Accept any base register. SP only in SImode or larger. */
9079 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
9082 /* This is PC relative data before arm_reorg runs. */
9083 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
9085 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
9086 && !arm_disable_literal_pool
)
9089 /* This is PC relative data after arm_reorg runs. */
9090 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
9093 || (GET_CODE (x
) == CONST
9094 && GET_CODE (XEXP (x
, 0)) == PLUS
9095 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
9096 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
9099 /* Post-inc indexing only supported for SImode and larger. */
9100 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
9101 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
9104 else if (GET_CODE (x
) == PLUS
)
9106 /* REG+REG address can be any two index registers. */
9107 /* We disallow FRAME+REG addressing since we know that FRAME
9108 will be replaced with STACK, and SP relative addressing only
9109 permits SP+OFFSET. */
9110 if (GET_MODE_SIZE (mode
) <= 4
9111 && XEXP (x
, 0) != frame_pointer_rtx
9112 && XEXP (x
, 1) != frame_pointer_rtx
9113 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
9114 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
9115 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
9118 /* REG+const has 5-7 bit offset for non-SP registers. */
9119 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
9120 || XEXP (x
, 0) == arg_pointer_rtx
)
9121 && CONST_INT_P (XEXP (x
, 1))
9122 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
9125 /* REG+const has 10-bit offset for SP, but only SImode and
9126 larger is supported. */
9127 /* ??? Should probably check for DI/DFmode overflow here
9128 just like GO_IF_LEGITIMATE_OFFSET does. */
9129 else if (REG_P (XEXP (x
, 0))
9130 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
9131 && GET_MODE_SIZE (mode
) >= 4
9132 && CONST_INT_P (XEXP (x
, 1))
9133 && INTVAL (XEXP (x
, 1)) >= 0
9134 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
9135 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
9138 else if (REG_P (XEXP (x
, 0))
9139 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
9140 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
9141 || VIRTUAL_REGISTER_P (XEXP (x
, 0)))
9142 && GET_MODE_SIZE (mode
) >= 4
9143 && CONST_INT_P (XEXP (x
, 1))
9144 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
9148 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
9149 && GET_MODE_SIZE (mode
) == 4
9151 && CONSTANT_POOL_ADDRESS_P (x
)
9152 && !arm_disable_literal_pool
9154 && symbol_mentioned_p (get_pool_constant (x
))
9155 && ! pcrel_constant_p (get_pool_constant (x
))))
9161 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9162 instruction of mode MODE. */
9164 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
9166 switch (GET_MODE_SIZE (mode
))
9169 return val
>= 0 && val
< 32;
9172 return val
>= 0 && val
< 64 && (val
& 1) == 0;
9176 && (val
+ GET_MODE_SIZE (mode
)) <= 128
9182 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
, code_helper
)
9185 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
9186 else if (TARGET_THUMB2
)
9187 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
9188 else /* if (TARGET_THUMB1) */
9189 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
9192 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9194 Given an rtx X being reloaded into a reg required to be
9195 in class CLASS, return the class of reg to actually use.
9196 In general this is just CLASS, but for the Thumb core registers and
9197 immediate constants we prefer a LO_REGS class or a subset. */
9200 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
9206 if (rclass
== GENERAL_REGS
)
9213 /* Build the SYMBOL_REF for __tls_get_addr. */
9215 static GTY(()) rtx tls_get_addr_libfunc
;
9218 get_tls_get_addr (void)
9220 if (!tls_get_addr_libfunc
)
9221 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
9222 return tls_get_addr_libfunc
;
9226 arm_load_tp (rtx target
)
9229 target
= gen_reg_rtx (SImode
);
9233 /* Can return in any reg. */
9234 emit_insn (gen_load_tp_hard (target
));
9238 /* Always returned in r0. Immediately copy the result into a pseudo,
9239 otherwise other uses of r0 (e.g. setting up function arguments) may
9240 clobber the value. */
9246 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
9247 rtx initial_fdpic_reg
= get_hard_reg_initial_val (Pmode
, FDPIC_REGNUM
);
9249 emit_insn (gen_load_tp_soft_fdpic ());
9252 emit_insn (gen_restore_pic_register_after_call(fdpic_reg
, initial_fdpic_reg
));
9255 emit_insn (gen_load_tp_soft ());
9257 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
9258 emit_move_insn (target
, tmp
);
9264 load_tls_operand (rtx x
, rtx reg
)
9268 if (reg
== NULL_RTX
)
9269 reg
= gen_reg_rtx (SImode
);
9271 tmp
= gen_rtx_CONST (SImode
, x
);
9273 emit_move_insn (reg
, tmp
);
9279 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
9281 rtx label
, labelno
= NULL_RTX
, sum
;
9283 gcc_assert (reloc
!= TLS_DESCSEQ
);
9288 sum
= gen_rtx_UNSPEC (Pmode
,
9289 gen_rtvec (2, x
, GEN_INT (reloc
)),
9294 labelno
= GEN_INT (pic_labelno
++);
9295 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9296 label
= gen_rtx_CONST (VOIDmode
, label
);
9298 sum
= gen_rtx_UNSPEC (Pmode
,
9299 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
9300 GEN_INT (TARGET_ARM
? 8 : 4)),
9303 reg
= load_tls_operand (sum
, reg
);
9306 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
9307 else if (TARGET_ARM
)
9308 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
9310 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
9312 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
9313 LCT_PURE
, /* LCT_CONST? */
9316 rtx_insn
*insns
= get_insns ();
9323 arm_tls_descseq_addr (rtx x
, rtx reg
)
9325 rtx labelno
= GEN_INT (pic_labelno
++);
9326 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9327 rtx sum
= gen_rtx_UNSPEC (Pmode
,
9328 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
9329 gen_rtx_CONST (VOIDmode
, label
),
9330 GEN_INT (!TARGET_ARM
)),
9332 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
9334 emit_insn (gen_tlscall (x
, labelno
));
9336 reg
= gen_reg_rtx (SImode
);
9338 gcc_assert (REGNO (reg
) != R0_REGNUM
);
9340 emit_move_insn (reg
, reg0
);
9347 legitimize_tls_address (rtx x
, rtx reg
)
9349 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
9351 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
9355 case TLS_MODEL_GLOBAL_DYNAMIC
:
9356 if (TARGET_GNU2_TLS
)
9358 gcc_assert (!TARGET_FDPIC
);
9360 reg
= arm_tls_descseq_addr (x
, reg
);
9362 tp
= arm_load_tp (NULL_RTX
);
9364 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
9368 /* Original scheme */
9370 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32_FDPIC
);
9372 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
9373 dest
= gen_reg_rtx (Pmode
);
9374 emit_libcall_block (insns
, dest
, ret
, x
);
9378 case TLS_MODEL_LOCAL_DYNAMIC
:
9379 if (TARGET_GNU2_TLS
)
9381 gcc_assert (!TARGET_FDPIC
);
9383 reg
= arm_tls_descseq_addr (x
, reg
);
9385 tp
= arm_load_tp (NULL_RTX
);
9387 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
9392 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32_FDPIC
);
9394 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
9396 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9397 share the LDM result with other LD model accesses. */
9398 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
9400 dest
= gen_reg_rtx (Pmode
);
9401 emit_libcall_block (insns
, dest
, ret
, eqv
);
9403 /* Load the addend. */
9404 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
9405 GEN_INT (TLS_LDO32
)),
9407 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
9408 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
9412 case TLS_MODEL_INITIAL_EXEC
:
9415 sum
= gen_rtx_UNSPEC (Pmode
,
9416 gen_rtvec (2, x
, GEN_INT (TLS_IE32_FDPIC
)),
9418 reg
= load_tls_operand (sum
, reg
);
9419 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
9420 emit_move_insn (reg
, gen_rtx_MEM (Pmode
, reg
));
9424 labelno
= GEN_INT (pic_labelno
++);
9425 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9426 label
= gen_rtx_CONST (VOIDmode
, label
);
9427 sum
= gen_rtx_UNSPEC (Pmode
,
9428 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
9429 GEN_INT (TARGET_ARM
? 8 : 4)),
9431 reg
= load_tls_operand (sum
, reg
);
9434 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
9435 else if (TARGET_THUMB2
)
9436 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
9439 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
9440 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
9444 tp
= arm_load_tp (NULL_RTX
);
9446 return gen_rtx_PLUS (Pmode
, tp
, reg
);
9448 case TLS_MODEL_LOCAL_EXEC
:
9449 tp
= arm_load_tp (NULL_RTX
);
9451 reg
= gen_rtx_UNSPEC (Pmode
,
9452 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
9454 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
9456 return gen_rtx_PLUS (Pmode
, tp
, reg
);
9463 /* Try machine-dependent ways of modifying an illegitimate address
9464 to be legitimate. If we find one, return the new, valid address. */
9466 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
9468 if (arm_tls_referenced_p (x
))
9472 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
9474 addend
= XEXP (XEXP (x
, 0), 1);
9475 x
= XEXP (XEXP (x
, 0), 0);
9478 if (!SYMBOL_REF_P (x
))
9481 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
9483 x
= legitimize_tls_address (x
, NULL_RTX
);
9487 x
= gen_rtx_PLUS (SImode
, x
, addend
);
9495 return thumb_legitimize_address (x
, orig_x
, mode
);
9497 if (GET_CODE (x
) == PLUS
)
9499 rtx xop0
= XEXP (x
, 0);
9500 rtx xop1
= XEXP (x
, 1);
9502 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
9503 xop0
= force_reg (SImode
, xop0
);
9505 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
9506 && !symbol_mentioned_p (xop1
))
9507 xop1
= force_reg (SImode
, xop1
);
9509 if (ARM_BASE_REGISTER_RTX_P (xop0
)
9510 && CONST_INT_P (xop1
))
9512 HOST_WIDE_INT n
, low_n
;
9516 /* VFP addressing modes actually allow greater offsets, but for
9517 now we just stick with the lowest common denominator. */
9518 if (mode
== DImode
|| mode
== DFmode
)
9530 low_n
= ((mode
) == TImode
? 0
9531 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
9535 base_reg
= gen_reg_rtx (SImode
);
9536 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
9537 emit_move_insn (base_reg
, val
);
9538 x
= plus_constant (Pmode
, base_reg
, low_n
);
9540 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
9541 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
9544 /* XXX We don't allow MINUS any more -- see comment in
9545 arm_legitimate_address_outer_p (). */
9546 else if (GET_CODE (x
) == MINUS
)
9548 rtx xop0
= XEXP (x
, 0);
9549 rtx xop1
= XEXP (x
, 1);
9551 if (CONSTANT_P (xop0
))
9552 xop0
= force_reg (SImode
, xop0
);
9554 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
9555 xop1
= force_reg (SImode
, xop1
);
9557 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
9558 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
9561 /* Make sure to take full advantage of the pre-indexed addressing mode
9562 with absolute addresses which often allows for the base register to
9563 be factorized for multiple adjacent memory references, and it might
9564 even allows for the mini pool to be avoided entirely. */
9565 else if (CONST_INT_P (x
) && optimize
> 0)
9568 HOST_WIDE_INT mask
, base
, index
;
9571 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9572 only use a 8-bit index. So let's use a 12-bit index for
9573 SImode only and hope that arm_gen_constant will enable LDRB
9574 to use more bits. */
9575 bits
= (mode
== SImode
) ? 12 : 8;
9576 mask
= (1 << bits
) - 1;
9577 base
= INTVAL (x
) & ~mask
;
9578 index
= INTVAL (x
) & mask
;
9579 if (TARGET_ARM
&& bit_count (base
& 0xffffffff) > (32 - bits
)/2)
9581 /* It'll most probably be more efficient to generate the
9582 base with more bits set and use a negative index instead.
9583 Don't do this for Thumb as negative offsets are much more
9588 base_reg
= force_reg (SImode
, GEN_INT (base
));
9589 x
= plus_constant (Pmode
, base_reg
, index
);
9594 /* We need to find and carefully transform any SYMBOL and LABEL
9595 references; so go back to the original address expression. */
9596 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9597 false /*compute_now*/);
9599 if (new_x
!= orig_x
)
9607 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9608 to be legitimate. If we find one, return the new, valid address. */
9610 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
9612 if (GET_CODE (x
) == PLUS
9613 && CONST_INT_P (XEXP (x
, 1))
9614 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
9615 || INTVAL (XEXP (x
, 1)) < 0))
9617 rtx xop0
= XEXP (x
, 0);
9618 rtx xop1
= XEXP (x
, 1);
9619 HOST_WIDE_INT offset
= INTVAL (xop1
);
9621 /* Try and fold the offset into a biasing of the base register and
9622 then offsetting that. Don't do this when optimizing for space
9623 since it can cause too many CSEs. */
9624 if (optimize_size
&& offset
>= 0
9625 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
9627 HOST_WIDE_INT delta
;
9630 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
9631 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
9632 delta
= 31 * GET_MODE_SIZE (mode
);
9634 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
9636 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
9638 x
= plus_constant (Pmode
, xop0
, delta
);
9640 else if (offset
< 0 && offset
> -256)
9641 /* Small negative offsets are best done with a subtract before the
9642 dereference, forcing these into a register normally takes two
9644 x
= force_operand (x
, NULL_RTX
);
9647 /* For the remaining cases, force the constant into a register. */
9648 xop1
= force_reg (SImode
, xop1
);
9649 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
9652 else if (GET_CODE (x
) == PLUS
9653 && s_register_operand (XEXP (x
, 1), SImode
)
9654 && !s_register_operand (XEXP (x
, 0), SImode
))
9656 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
9658 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
9663 /* We need to find and carefully transform any SYMBOL and LABEL
9664 references; so go back to the original address expression. */
9665 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9666 false /*compute_now*/);
9668 if (new_x
!= orig_x
)
9675 /* Return TRUE if X contains any TLS symbol references. */
9678 arm_tls_referenced_p (rtx x
)
9680 if (! TARGET_HAVE_TLS
)
9683 subrtx_iterator::array_type array
;
9684 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
9686 const_rtx x
= *iter
;
9687 if (SYMBOL_REF_P (x
) && SYMBOL_REF_TLS_MODEL (x
) != 0)
9689 /* ARM currently does not provide relocations to encode TLS variables
9690 into AArch32 instructions, only data, so there is no way to
9691 currently implement these if a literal pool is disabled. */
9692 if (arm_disable_literal_pool
)
9693 sorry ("accessing thread-local storage is not currently supported "
9694 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9699 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9700 TLS offsets, not real symbol references. */
9701 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9702 iter
.skip_subrtxes ();
9707 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9709 On the ARM, allow any integer (invalid ones are removed later by insn
9710 patterns), nice doubles and symbol_refs which refer to the function's
9713 When generating pic allow anything. */
9716 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
9718 if (GET_CODE (x
) == CONST_VECTOR
&& !neon_make_constant (x
, false))
9721 return flag_pic
|| !label_mentioned_p (x
);
9725 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9727 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9728 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9729 for ARMv8-M Baseline or later the result is valid. */
9730 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
9733 return (CONST_INT_P (x
)
9734 || CONST_DOUBLE_P (x
)
9735 || CONSTANT_ADDRESS_P (x
)
9736 || (TARGET_HAVE_MOVT
&& SYMBOL_REF_P (x
))
9737 /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9738 we build the symbol address with upper/lower
9741 && !label_mentioned_p (x
)
9742 && arm_valid_symbolic_address_p (x
)
9743 && arm_disable_literal_pool
)
9748 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
9750 return (!arm_cannot_force_const_mem (mode
, x
)
9752 ? arm_legitimate_constant_p_1 (mode
, x
)
9753 : thumb_legitimate_constant_p (mode
, x
)));
9756 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9759 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9762 split_const (x
, &base
, &offset
);
9764 if (SYMBOL_REF_P (base
))
9766 /* Function symbols cannot have an offset due to the Thumb bit. */
9767 if ((SYMBOL_REF_FLAGS (base
) & SYMBOL_FLAG_FUNCTION
)
9768 && INTVAL (offset
) != 0)
9771 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9772 && !offset_within_block_p (base
, INTVAL (offset
)))
9775 return arm_tls_referenced_p (x
);
9778 #define REG_OR_SUBREG_REG(X) \
9780 || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9782 #define REG_OR_SUBREG_RTX(X) \
9783 (REG_P (X) ? (X) : SUBREG_REG (X))
9786 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9788 machine_mode mode
= GET_MODE (x
);
9797 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9804 return COSTS_N_INSNS (1);
9807 if (arm_arch6m
&& arm_m_profile_small_mul
)
9808 return COSTS_N_INSNS (32);
9810 if (CONST_INT_P (XEXP (x
, 1)))
9813 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
9820 return COSTS_N_INSNS (2) + cycles
;
9822 return COSTS_N_INSNS (1) + 16;
9825 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9827 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9828 return (COSTS_N_INSNS (words
)
9829 + 4 * ((MEM_P (SET_SRC (x
)))
9830 + MEM_P (SET_DEST (x
))));
9835 if (UINTVAL (x
) < 256
9836 /* 16-bit constant. */
9837 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
9839 if (thumb_shiftable_const (INTVAL (x
)))
9840 return COSTS_N_INSNS (2);
9841 return arm_disable_literal_pool
9843 : COSTS_N_INSNS (3);
9845 else if ((outer
== PLUS
|| outer
== COMPARE
)
9846 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9848 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9849 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9850 return COSTS_N_INSNS (1);
9851 else if (outer
== AND
)
9854 /* This duplicates the tests in the andsi3 expander. */
9855 for (i
= 9; i
<= 31; i
++)
9856 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9857 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9858 return COSTS_N_INSNS (2);
9860 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9861 || outer
== LSHIFTRT
)
9863 return COSTS_N_INSNS (2);
9869 return COSTS_N_INSNS (3);
9887 /* XXX another guess. */
9888 /* Memory costs quite a lot for the first word, but subsequent words
9889 load at the equivalent of a single insn each. */
9890 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9891 + ((SYMBOL_REF_P (x
) && CONSTANT_POOL_ADDRESS_P (x
))
9896 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9902 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
9903 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
9909 return total
+ COSTS_N_INSNS (1);
9911 /* Assume a two-shift sequence. Increase the cost slightly so
9912 we prefer actual shifts over an extend operation. */
9913 return total
+ 1 + COSTS_N_INSNS (2);
9920 /* Estimates the size cost of thumb1 instructions.
9921 For now most of the code is copied from thumb1_rtx_costs. We need more
9922 fine grain tuning when we have more related test cases. */
9924 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9926 machine_mode mode
= GET_MODE (x
);
9935 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9939 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9940 defined by RTL expansion, especially for the expansion of
9942 if ((GET_CODE (XEXP (x
, 0)) == MULT
9943 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
9944 || (GET_CODE (XEXP (x
, 1)) == MULT
9945 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
9946 return COSTS_N_INSNS (2);
9951 return COSTS_N_INSNS (1);
9954 if (CONST_INT_P (XEXP (x
, 1)))
9956 /* Thumb1 mul instruction can't operate on const. We must Load it
9957 into a register first. */
9958 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
9959 /* For the targets which have a very small and high-latency multiply
9960 unit, we prefer to synthesize the mult with up to 5 instructions,
9961 giving a good balance between size and performance. */
9962 if (arm_arch6m
&& arm_m_profile_small_mul
)
9963 return COSTS_N_INSNS (5);
9965 return COSTS_N_INSNS (1) + const_size
;
9967 return COSTS_N_INSNS (1);
9970 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9972 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9973 cost
= COSTS_N_INSNS (words
);
9974 if (satisfies_constraint_J (SET_SRC (x
))
9975 || satisfies_constraint_K (SET_SRC (x
))
9976 /* Too big an immediate for a 2-byte mov, using MOVT. */
9977 || (CONST_INT_P (SET_SRC (x
))
9978 && UINTVAL (SET_SRC (x
)) >= 256
9980 && satisfies_constraint_j (SET_SRC (x
)))
9981 /* thumb1_movdi_insn. */
9982 || ((words
> 1) && MEM_P (SET_SRC (x
))))
9983 cost
+= COSTS_N_INSNS (1);
9989 if (UINTVAL (x
) < 256)
9990 return COSTS_N_INSNS (1);
9991 /* movw is 4byte long. */
9992 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
9993 return COSTS_N_INSNS (2);
9994 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9995 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9996 return COSTS_N_INSNS (2);
9997 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9998 if (thumb_shiftable_const (INTVAL (x
)))
9999 return COSTS_N_INSNS (2);
10000 return arm_disable_literal_pool
10001 ? COSTS_N_INSNS (8)
10002 : COSTS_N_INSNS (3);
10004 else if ((outer
== PLUS
|| outer
== COMPARE
)
10005 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
10007 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
10008 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
10009 return COSTS_N_INSNS (1);
10010 else if (outer
== AND
)
10013 /* This duplicates the tests in the andsi3 expander. */
10014 for (i
= 9; i
<= 31; i
++)
10015 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
10016 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
10017 return COSTS_N_INSNS (2);
10019 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
10020 || outer
== LSHIFTRT
)
10022 return COSTS_N_INSNS (2);
10028 return COSTS_N_INSNS (3);
10042 return COSTS_N_INSNS (1);
10045 return (COSTS_N_INSNS (1)
10046 + COSTS_N_INSNS (1)
10047 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
10048 + ((SYMBOL_REF_P (x
) && CONSTANT_POOL_ADDRESS_P (x
))
10049 ? COSTS_N_INSNS (1) : 0));
10053 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10058 /* XXX still guessing. */
10059 switch (GET_MODE (XEXP (x
, 0)))
10062 return (1 + (mode
== DImode
? 4 : 0)
10063 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
10066 return (4 + (mode
== DImode
? 4 : 0)
10067 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
10070 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
10081 /* Helper function for arm_rtx_costs. If one operand of the OP, a
10082 PLUS, adds the carry flag, then return the other operand. If
10083 neither is a carry, return OP unchanged. */
10085 strip_carry_operation (rtx op
)
10087 gcc_assert (GET_CODE (op
) == PLUS
);
10088 if (arm_carry_operation (XEXP (op
, 0), GET_MODE (op
)))
10089 return XEXP (op
, 1);
10090 else if (arm_carry_operation (XEXP (op
, 1), GET_MODE (op
)))
10091 return XEXP (op
, 0);
10095 /* Helper function for arm_rtx_costs. If the operand is a valid shift
10096 operand, then return the operand that is being shifted. If the shift
10097 is not by a constant, then set SHIFT_REG to point to the operand.
10098 Return NULL if OP is not a shifter operand. */
10100 shifter_op_p (rtx op
, rtx
*shift_reg
)
10102 enum rtx_code code
= GET_CODE (op
);
10104 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
10105 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
10106 return XEXP (op
, 0);
10107 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
10108 return XEXP (op
, 0);
10109 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
10110 || code
== ASHIFTRT
)
10112 if (!CONST_INT_P (XEXP (op
, 1)))
10113 *shift_reg
= XEXP (op
, 1);
10114 return XEXP (op
, 0);
10121 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
10123 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
10124 rtx_code code
= GET_CODE (x
);
10125 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
10127 switch (XINT (x
, 1))
10129 case UNSPEC_UNALIGNED_LOAD
:
10130 /* We can only do unaligned loads into the integer unit, and we can't
10131 use LDM or LDRD. */
10132 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
10134 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
10135 + extra_cost
->ldst
.load_unaligned
);
10138 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
10139 ADDR_SPACE_GENERIC
, speed_p
);
10143 case UNSPEC_UNALIGNED_STORE
:
10144 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
10146 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
10147 + extra_cost
->ldst
.store_unaligned
);
10149 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
10151 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
10152 ADDR_SPACE_GENERIC
, speed_p
);
10156 case UNSPEC_VRINTZ
:
10157 case UNSPEC_VRINTP
:
10158 case UNSPEC_VRINTM
:
10159 case UNSPEC_VRINTR
:
10160 case UNSPEC_VRINTX
:
10161 case UNSPEC_VRINTA
:
10163 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
10167 *cost
= COSTS_N_INSNS (2);
10173 /* Cost of a libcall. We assume one insn per argument, an amount for the
10174 call (one insn for -Os) and then one for processing the result. */
10175 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10177 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
10180 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
10181 if (shift_op != NULL \
10182 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
10187 *cost += extra_cost->alu.arith_shift_reg; \
10188 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10189 ASHIFT, 1, speed_p); \
10191 else if (speed_p) \
10192 *cost += extra_cost->alu.arith_shift; \
10194 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
10195 ASHIFT, 0, speed_p) \
10196 + rtx_cost (XEXP (x, 1 - IDX), \
10197 GET_MODE (shift_op), \
10198 OP, 1, speed_p)); \
10204 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
10205 considering the costs of the addressing mode and memory access
10208 arm_mem_costs (rtx x
, const struct cpu_cost_table
*extra_cost
,
10209 int *cost
, bool speed_p
)
10211 machine_mode mode
= GET_MODE (x
);
10213 *cost
= COSTS_N_INSNS (1);
10216 && GET_CODE (XEXP (x
, 0)) == PLUS
10217 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
10218 /* This will be split into two instructions. Add the cost of the
10219 additional instruction here. The cost of the memory access is computed
10220 below. See arm.md:calculate_pic_address. */
10221 *cost
+= COSTS_N_INSNS (1);
10223 /* Calculate cost of the addressing mode. */
10226 arm_addr_mode_op op_type
;
10227 switch (GET_CODE (XEXP (x
, 0)))
10231 op_type
= AMO_DEFAULT
;
10234 /* MINUS does not appear in RTL, but the architecture supports it,
10235 so handle this case defensively. */
10238 op_type
= AMO_NO_WB
;
10250 if (VECTOR_MODE_P (mode
))
10251 *cost
+= current_tune
->addr_mode_costs
->vector
[op_type
];
10252 else if (FLOAT_MODE_P (mode
))
10253 *cost
+= current_tune
->addr_mode_costs
->fp
[op_type
];
10255 *cost
+= current_tune
->addr_mode_costs
->integer
[op_type
];
10258 /* Calculate cost of memory access. */
10261 if (FLOAT_MODE_P (mode
))
10263 if (GET_MODE_SIZE (mode
) == 8)
10264 *cost
+= extra_cost
->ldst
.loadd
;
10266 *cost
+= extra_cost
->ldst
.loadf
;
10268 else if (VECTOR_MODE_P (mode
))
10269 *cost
+= extra_cost
->ldst
.loadv
;
10272 /* Integer modes */
10273 if (GET_MODE_SIZE (mode
) == 8)
10274 *cost
+= extra_cost
->ldst
.ldrd
;
10276 *cost
+= extra_cost
->ldst
.load
;
10283 /* Helper for arm_bfi_p. */
10285 arm_bfi_1_p (rtx op0
, rtx op1
, rtx
*sub0
, rtx
*sub1
)
10287 unsigned HOST_WIDE_INT const1
;
10288 unsigned HOST_WIDE_INT const2
= 0;
10290 if (!CONST_INT_P (XEXP (op0
, 1)))
10293 const1
= UINTVAL (XEXP (op0
, 1));
10294 if (!CONST_INT_P (XEXP (op1
, 1))
10295 || ~UINTVAL (XEXP (op1
, 1)) != const1
)
10298 if (GET_CODE (XEXP (op0
, 0)) == ASHIFT
10299 && CONST_INT_P (XEXP (XEXP (op0
, 0), 1)))
10301 const2
= UINTVAL (XEXP (XEXP (op0
, 0), 1));
10302 *sub0
= XEXP (XEXP (op0
, 0), 0);
10305 *sub0
= XEXP (op0
, 0);
10307 if (const2
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
10310 *sub1
= XEXP (op1
, 0);
10311 return exact_log2 (const1
+ (HOST_WIDE_INT_1U
<< const2
)) >= 0;
10314 /* Recognize a BFI idiom. Helper for arm_rtx_costs_internal. The
10315 format looks something like:
10317 (IOR (AND (reg1) (~const1))
10318 (AND (ASHIFT (reg2) (const2))
10321 where const1 is a consecutive sequence of 1-bits with the
10322 least-significant non-zero bit starting at bit position const2. If
10323 const2 is zero, then the shift will not appear at all, due to
10324 canonicalization. The two arms of the IOR expression may be
10327 arm_bfi_p (rtx x
, rtx
*sub0
, rtx
*sub1
)
10329 if (GET_CODE (x
) != IOR
)
10331 if (GET_CODE (XEXP (x
, 0)) != AND
10332 || GET_CODE (XEXP (x
, 1)) != AND
)
10334 return (arm_bfi_1_p (XEXP (x
, 0), XEXP (x
, 1), sub0
, sub1
)
10335 || arm_bfi_1_p (XEXP (x
, 1), XEXP (x
, 0), sub1
, sub0
));
10338 /* RTX costs. Make an estimate of the cost of executing the operation
10339 X, which is contained within an operation with code OUTER_CODE.
10340 SPEED_P indicates whether the cost desired is the performance cost,
10341 or the size cost. The estimate is stored in COST and the return
10342 value is TRUE if the cost calculation is final, or FALSE if the
10343 caller should recurse through the operands of X to add additional
10346 We currently make no attempt to model the size savings of Thumb-2
10347 16-bit instructions. At the normal points in compilation where
10348 this code is called we have no measure of whether the condition
10349 flags are live or not, and thus no realistic way to determine what
10350 the size will eventually be. */
10352 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10353 const struct cpu_cost_table
*extra_cost
,
10354 int *cost
, bool speed_p
)
10356 machine_mode mode
= GET_MODE (x
);
10358 *cost
= COSTS_N_INSNS (1);
10363 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
10365 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
10373 /* SET RTXs don't have a mode so we get it from the destination. */
10374 mode
= GET_MODE (SET_DEST (x
));
10376 if (REG_P (SET_SRC (x
))
10377 && REG_P (SET_DEST (x
)))
10379 /* Assume that most copies can be done with a single insn,
10380 unless we don't have HW FP, in which case everything
10381 larger than word mode will require two insns. */
10382 *cost
= COSTS_N_INSNS (((!TARGET_VFP_BASE
10383 && GET_MODE_SIZE (mode
) > 4)
10386 /* Conditional register moves can be encoded
10387 in 16 bits in Thumb mode. */
10388 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
10394 if (CONST_INT_P (SET_SRC (x
)))
10396 /* Handle CONST_INT here, since the value doesn't have a mode
10397 and we would otherwise be unable to work out the true cost. */
10398 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
10401 /* Slightly lower the cost of setting a core reg to a constant.
10402 This helps break up chains and allows for better scheduling. */
10403 if (REG_P (SET_DEST (x
))
10404 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
10407 /* Immediate moves with an immediate in the range [0, 255] can be
10408 encoded in 16 bits in Thumb mode. */
10409 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
10410 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
10412 goto const_int_cost
;
10418 return arm_mem_costs (x
, extra_cost
, cost
, speed_p
);
10422 /* Calculations of LDM costs are complex. We assume an initial cost
10423 (ldm_1st) which will load the number of registers mentioned in
10424 ldm_regs_per_insn_1st registers; then each additional
10425 ldm_regs_per_insn_subsequent registers cost one more insn. The
10426 formula for N regs is thus:
10428 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10429 + ldm_regs_per_insn_subsequent - 1)
10430 / ldm_regs_per_insn_subsequent).
10432 Additional costs may also be added for addressing. A similar
10433 formula is used for STM. */
10435 bool is_ldm
= load_multiple_operation (x
, SImode
);
10436 bool is_stm
= store_multiple_operation (x
, SImode
);
10438 if (is_ldm
|| is_stm
)
10442 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
10443 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
10444 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
10445 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
10446 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
10447 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
10448 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
10450 *cost
+= regs_per_insn_1st
10451 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
10452 + regs_per_insn_sub
- 1)
10453 / regs_per_insn_sub
);
10462 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10463 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10464 *cost
+= COSTS_N_INSNS (speed_p
10465 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
10466 else if (mode
== SImode
&& TARGET_IDIV
)
10467 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
10469 *cost
= LIBCALL_COST (2);
10471 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10472 possible udiv is prefered. */
10473 *cost
+= (code
== DIV
? COSTS_N_INSNS (1) : 0);
10474 return false; /* All arguments must be in registers. */
10477 /* MOD by a power of 2 can be expanded as:
10479 and r0, r0, #(n - 1)
10480 and r1, r1, #(n - 1)
10481 rsbpl r0, r1, #0. */
10482 if (CONST_INT_P (XEXP (x
, 1))
10483 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
10486 *cost
+= COSTS_N_INSNS (3);
10489 *cost
+= 2 * extra_cost
->alu
.logical
10490 + extra_cost
->alu
.arith
;
10494 /* Fall-through. */
10496 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10497 possible udiv is prefered. */
10498 *cost
= LIBCALL_COST (2) + (code
== MOD
? COSTS_N_INSNS (1) : 0);
10499 return false; /* All arguments must be in registers. */
10502 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
10504 *cost
+= (COSTS_N_INSNS (1)
10505 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
10507 *cost
+= extra_cost
->alu
.shift_reg
;
10515 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
10517 *cost
+= (COSTS_N_INSNS (2)
10518 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
10520 *cost
+= 2 * extra_cost
->alu
.shift
;
10521 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10522 if (code
== ASHIFT
&& XEXP (x
, 1) == CONST1_RTX (SImode
))
10526 else if (mode
== SImode
)
10528 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10529 /* Slightly disparage register shifts at -Os, but not by much. */
10530 if (!CONST_INT_P (XEXP (x
, 1)))
10531 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
10532 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10535 else if (GET_MODE_CLASS (mode
) == MODE_INT
10536 && GET_MODE_SIZE (mode
) < 4)
10538 if (code
== ASHIFT
)
10540 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10541 /* Slightly disparage register shifts at -Os, but not by
10543 if (!CONST_INT_P (XEXP (x
, 1)))
10544 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
10545 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10547 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
10549 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
10551 /* Can use SBFX/UBFX. */
10553 *cost
+= extra_cost
->alu
.bfx
;
10554 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10558 *cost
+= COSTS_N_INSNS (1);
10559 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10562 if (CONST_INT_P (XEXP (x
, 1)))
10563 *cost
+= 2 * extra_cost
->alu
.shift
;
10565 *cost
+= (extra_cost
->alu
.shift
10566 + extra_cost
->alu
.shift_reg
);
10569 /* Slightly disparage register shifts. */
10570 *cost
+= !CONST_INT_P (XEXP (x
, 1));
10573 else /* Rotates. */
10575 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
10576 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10579 if (CONST_INT_P (XEXP (x
, 1)))
10580 *cost
+= (2 * extra_cost
->alu
.shift
10581 + extra_cost
->alu
.log_shift
);
10583 *cost
+= (extra_cost
->alu
.shift
10584 + extra_cost
->alu
.shift_reg
10585 + extra_cost
->alu
.log_shift_reg
);
10591 *cost
= LIBCALL_COST (2);
10597 if (mode
== SImode
)
10600 *cost
+= extra_cost
->alu
.rev
;
10607 /* No rev instruction available. Look at arm_legacy_rev
10608 and thumb_legacy_rev for the form of RTL used then. */
10611 *cost
+= COSTS_N_INSNS (9);
10615 *cost
+= 6 * extra_cost
->alu
.shift
;
10616 *cost
+= 3 * extra_cost
->alu
.logical
;
10621 *cost
+= COSTS_N_INSNS (4);
10625 *cost
+= 2 * extra_cost
->alu
.shift
;
10626 *cost
+= extra_cost
->alu
.arith_shift
;
10627 *cost
+= 2 * extra_cost
->alu
.logical
;
10635 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10636 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10638 if (GET_CODE (XEXP (x
, 0)) == MULT
10639 || GET_CODE (XEXP (x
, 1)) == MULT
)
10641 rtx mul_op0
, mul_op1
, sub_op
;
10644 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10646 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10648 mul_op0
= XEXP (XEXP (x
, 0), 0);
10649 mul_op1
= XEXP (XEXP (x
, 0), 1);
10650 sub_op
= XEXP (x
, 1);
10654 mul_op0
= XEXP (XEXP (x
, 1), 0);
10655 mul_op1
= XEXP (XEXP (x
, 1), 1);
10656 sub_op
= XEXP (x
, 0);
10659 /* The first operand of the multiply may be optionally
10661 if (GET_CODE (mul_op0
) == NEG
)
10662 mul_op0
= XEXP (mul_op0
, 0);
10664 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10665 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10666 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
10672 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10676 if (mode
== SImode
)
10678 rtx shift_by_reg
= NULL
;
10681 rtx op0
= XEXP (x
, 0);
10682 rtx op1
= XEXP (x
, 1);
10684 /* Factor out any borrow operation. There's more than one way
10685 of expressing this; try to recognize them all. */
10686 if (GET_CODE (op0
) == MINUS
)
10688 if (arm_borrow_operation (op1
, SImode
))
10690 op1
= XEXP (op0
, 1);
10691 op0
= XEXP (op0
, 0);
10693 else if (arm_borrow_operation (XEXP (op0
, 1), SImode
))
10694 op0
= XEXP (op0
, 0);
10696 else if (GET_CODE (op1
) == PLUS
10697 && arm_borrow_operation (XEXP (op1
, 0), SImode
))
10698 op1
= XEXP (op1
, 0);
10699 else if (GET_CODE (op0
) == NEG
10700 && arm_borrow_operation (op1
, SImode
))
10702 /* Negate with carry-in. For Thumb2 this is done with
10703 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10704 RSC instruction that exists in Arm mode. */
10706 *cost
+= (TARGET_THUMB2
10707 ? extra_cost
->alu
.arith_shift
10708 : extra_cost
->alu
.arith
);
10709 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, MINUS
, 0, speed_p
);
10712 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10713 Note we do mean ~borrow here. */
10714 else if (TARGET_ARM
&& arm_carry_operation (op0
, SImode
))
10716 *cost
+= rtx_cost (op1
, mode
, code
, 1, speed_p
);
10720 shift_op
= shifter_op_p (op0
, &shift_by_reg
);
10721 if (shift_op
== NULL
)
10723 shift_op
= shifter_op_p (op1
, &shift_by_reg
);
10724 non_shift_op
= op0
;
10727 non_shift_op
= op1
;
10729 if (shift_op
!= NULL
)
10731 if (shift_by_reg
!= NULL
)
10734 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10735 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
10738 *cost
+= extra_cost
->alu
.arith_shift
;
10740 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
10741 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
10745 if (arm_arch_thumb2
10746 && GET_CODE (XEXP (x
, 1)) == MULT
)
10750 *cost
+= extra_cost
->mult
[0].add
;
10751 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
10752 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
10753 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
10757 if (CONST_INT_P (op0
))
10759 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
10760 INTVAL (op0
), NULL_RTX
,
10762 *cost
= COSTS_N_INSNS (insns
);
10764 *cost
+= insns
* extra_cost
->alu
.arith
;
10765 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10769 *cost
+= extra_cost
->alu
.arith
;
10771 /* Don't recurse as we don't want to cost any borrow that
10773 *cost
+= rtx_cost (op0
, mode
, MINUS
, 0, speed_p
);
10774 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10778 if (GET_MODE_CLASS (mode
) == MODE_INT
10779 && GET_MODE_SIZE (mode
) < 4)
10781 rtx shift_op
, shift_reg
;
10784 /* We check both sides of the MINUS for shifter operands since,
10785 unlike PLUS, it's not commutative. */
10787 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0);
10788 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1);
10790 /* Slightly disparage, as we might need to widen the result. */
10793 *cost
+= extra_cost
->alu
.arith
;
10795 if (CONST_INT_P (XEXP (x
, 0)))
10797 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10804 if (mode
== DImode
)
10806 *cost
+= COSTS_N_INSNS (1);
10808 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
10810 rtx op1
= XEXP (x
, 1);
10813 *cost
+= 2 * extra_cost
->alu
.arith
;
10815 if (GET_CODE (op1
) == ZERO_EXTEND
)
10816 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
10819 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10820 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10824 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10827 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
10828 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
10830 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
10833 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10834 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
10837 *cost
+= (extra_cost
->alu
.arith
10838 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10839 ? extra_cost
->alu
.arith
10840 : extra_cost
->alu
.arith_shift
));
10841 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
10842 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10843 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
10848 *cost
+= 2 * extra_cost
->alu
.arith
;
10854 *cost
= LIBCALL_COST (2);
10858 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10859 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10861 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10863 rtx mul_op0
, mul_op1
, add_op
;
10866 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10868 mul_op0
= XEXP (XEXP (x
, 0), 0);
10869 mul_op1
= XEXP (XEXP (x
, 0), 1);
10870 add_op
= XEXP (x
, 1);
10872 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10873 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10874 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
10880 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10883 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10885 *cost
= LIBCALL_COST (2);
10889 /* Narrow modes can be synthesized in SImode, but the range
10890 of useful sub-operations is limited. Check for shift operations
10891 on one of the operands. Only left shifts can be used in the
10893 if (GET_MODE_CLASS (mode
) == MODE_INT
10894 && GET_MODE_SIZE (mode
) < 4)
10896 rtx shift_op
, shift_reg
;
10899 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0);
10901 if (CONST_INT_P (XEXP (x
, 1)))
10903 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10904 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10906 *cost
= COSTS_N_INSNS (insns
);
10908 *cost
+= insns
* extra_cost
->alu
.arith
;
10909 /* Slightly penalize a narrow operation as the result may
10911 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10915 /* Slightly penalize a narrow operation as the result may
10919 *cost
+= extra_cost
->alu
.arith
;
10924 if (mode
== SImode
)
10926 rtx shift_op
, shift_reg
;
10928 if (TARGET_INT_SIMD
10929 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10930 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10932 /* UXTA[BH] or SXTA[BH]. */
10934 *cost
+= extra_cost
->alu
.extend_arith
;
10935 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10937 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
10941 rtx op0
= XEXP (x
, 0);
10942 rtx op1
= XEXP (x
, 1);
10944 /* Handle a side effect of adding in the carry to an addition. */
10945 if (GET_CODE (op0
) == PLUS
10946 && arm_carry_operation (op1
, mode
))
10948 op1
= XEXP (op0
, 1);
10949 op0
= XEXP (op0
, 0);
10951 else if (GET_CODE (op1
) == PLUS
10952 && arm_carry_operation (op0
, mode
))
10954 op0
= XEXP (op1
, 0);
10955 op1
= XEXP (op1
, 1);
10957 else if (GET_CODE (op0
) == PLUS
)
10959 op0
= strip_carry_operation (op0
);
10960 if (swap_commutative_operands_p (op0
, op1
))
10961 std::swap (op0
, op1
);
10964 if (arm_carry_operation (op0
, mode
))
10966 /* Adding the carry to a register is a canonicalization of
10967 adding 0 to the register plus the carry. */
10969 *cost
+= extra_cost
->alu
.arith
;
10970 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
10975 shift_op
= shifter_op_p (op0
, &shift_reg
);
10976 if (shift_op
!= NULL
)
10981 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10982 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10985 *cost
+= extra_cost
->alu
.arith_shift
;
10987 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10988 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10992 if (GET_CODE (op0
) == MULT
)
10996 if (TARGET_DSP_MULTIPLY
10997 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
10998 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10999 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
11000 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
11001 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
11002 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
11003 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
11004 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
11005 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
11006 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
11007 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
11008 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
11011 /* SMLA[BT][BT]. */
11013 *cost
+= extra_cost
->mult
[0].extend_add
;
11014 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
11015 SIGN_EXTEND
, 0, speed_p
)
11016 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
11017 SIGN_EXTEND
, 0, speed_p
)
11018 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
11023 *cost
+= extra_cost
->mult
[0].add
;
11024 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
11025 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
11026 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
11030 if (CONST_INT_P (op1
))
11032 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
11033 INTVAL (op1
), NULL_RTX
,
11035 *cost
= COSTS_N_INSNS (insns
);
11037 *cost
+= insns
* extra_cost
->alu
.arith
;
11038 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
11043 *cost
+= extra_cost
->alu
.arith
;
11045 /* Don't recurse here because we want to test the operands
11046 without any carry operation. */
11047 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
11048 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
11052 if (mode
== DImode
)
11054 if (GET_CODE (XEXP (x
, 0)) == MULT
11055 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
11056 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
11057 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
11058 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
11061 *cost
+= extra_cost
->mult
[1].extend_add
;
11062 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
11063 ZERO_EXTEND
, 0, speed_p
)
11064 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
11065 ZERO_EXTEND
, 0, speed_p
)
11066 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
11070 *cost
+= COSTS_N_INSNS (1);
11072 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11073 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
11076 *cost
+= (extra_cost
->alu
.arith
11077 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11078 ? extra_cost
->alu
.arith
11079 : extra_cost
->alu
.arith_shift
));
11081 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
11083 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
11088 *cost
+= 2 * extra_cost
->alu
.arith
;
11093 *cost
= LIBCALL_COST (2);
11098 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
11101 *cost
+= extra_cost
->alu
.rev
;
11105 else if (mode
== SImode
&& arm_arch_thumb2
11106 && arm_bfi_p (x
, &sub0
, &sub1
))
11108 *cost
+= rtx_cost (sub0
, mode
, ZERO_EXTRACT
, 1, speed_p
);
11109 *cost
+= rtx_cost (sub1
, mode
, ZERO_EXTRACT
, 0, speed_p
);
11111 *cost
+= extra_cost
->alu
.bfi
;
11117 /* Fall through. */
11118 case AND
: case XOR
:
11119 if (mode
== SImode
)
11121 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
11122 rtx op0
= XEXP (x
, 0);
11123 rtx shift_op
, shift_reg
;
11127 || (code
== IOR
&& TARGET_THUMB2
)))
11128 op0
= XEXP (op0
, 0);
11131 shift_op
= shifter_op_p (op0
, &shift_reg
);
11132 if (shift_op
!= NULL
)
11137 *cost
+= extra_cost
->alu
.log_shift_reg
;
11138 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
11141 *cost
+= extra_cost
->alu
.log_shift
;
11143 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
11144 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
11148 if (CONST_INT_P (XEXP (x
, 1)))
11150 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
11151 INTVAL (XEXP (x
, 1)), NULL_RTX
,
11154 *cost
= COSTS_N_INSNS (insns
);
11156 *cost
+= insns
* extra_cost
->alu
.logical
;
11157 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
11162 *cost
+= extra_cost
->alu
.logical
;
11163 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
11164 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
11168 if (mode
== DImode
)
11170 rtx op0
= XEXP (x
, 0);
11171 enum rtx_code subcode
= GET_CODE (op0
);
11173 *cost
+= COSTS_N_INSNS (1);
11177 || (code
== IOR
&& TARGET_THUMB2
)))
11178 op0
= XEXP (op0
, 0);
11180 if (GET_CODE (op0
) == ZERO_EXTEND
)
11183 *cost
+= 2 * extra_cost
->alu
.logical
;
11185 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
11187 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
11190 else if (GET_CODE (op0
) == SIGN_EXTEND
)
11193 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
11195 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
11197 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
11202 *cost
+= 2 * extra_cost
->alu
.logical
;
11208 *cost
= LIBCALL_COST (2);
11212 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11213 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11215 rtx op0
= XEXP (x
, 0);
11217 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
11218 op0
= XEXP (op0
, 0);
11221 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
11223 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
11224 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
11227 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11229 *cost
= LIBCALL_COST (2);
11233 if (mode
== SImode
)
11235 if (TARGET_DSP_MULTIPLY
11236 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
11237 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
11238 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
11239 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
11240 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
11241 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
11242 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11243 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
11244 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
11245 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
11246 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
11247 && (INTVAL (XEXP (XEXP (x
, 1), 1))
11250 /* SMUL[TB][TB]. */
11252 *cost
+= extra_cost
->mult
[0].extend
;
11253 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
11254 SIGN_EXTEND
, 0, speed_p
);
11255 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
11256 SIGN_EXTEND
, 1, speed_p
);
11260 *cost
+= extra_cost
->mult
[0].simple
;
11264 if (mode
== DImode
)
11266 if ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11267 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
11268 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
11269 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
))
11272 *cost
+= extra_cost
->mult
[1].extend
;
11273 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
11274 ZERO_EXTEND
, 0, speed_p
)
11275 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
11276 ZERO_EXTEND
, 0, speed_p
));
11280 *cost
= LIBCALL_COST (2);
11285 *cost
= LIBCALL_COST (2);
11289 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11290 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11292 if (GET_CODE (XEXP (x
, 0)) == MULT
)
11295 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
11300 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
11304 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11306 *cost
= LIBCALL_COST (1);
11310 if (mode
== SImode
)
11312 if (GET_CODE (XEXP (x
, 0)) == ABS
)
11314 *cost
+= COSTS_N_INSNS (1);
11315 /* Assume the non-flag-changing variant. */
11317 *cost
+= (extra_cost
->alu
.log_shift
11318 + extra_cost
->alu
.arith_shift
);
11319 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
11323 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
11324 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
11326 *cost
+= COSTS_N_INSNS (1);
11327 /* No extra cost for MOV imm and MVN imm. */
11328 /* If the comparison op is using the flags, there's no further
11329 cost, otherwise we need to add the cost of the comparison. */
11330 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
11331 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
11332 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
11334 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
11335 *cost
+= (COSTS_N_INSNS (1)
11336 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
11338 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
11341 *cost
+= extra_cost
->alu
.arith
;
11347 *cost
+= extra_cost
->alu
.arith
;
11351 if (GET_MODE_CLASS (mode
) == MODE_INT
11352 && GET_MODE_SIZE (mode
) < 4)
11354 /* Slightly disparage, as we might need an extend operation. */
11357 *cost
+= extra_cost
->alu
.arith
;
11361 if (mode
== DImode
)
11363 *cost
+= COSTS_N_INSNS (1);
11365 *cost
+= 2 * extra_cost
->alu
.arith
;
11370 *cost
= LIBCALL_COST (1);
11374 if (mode
== SImode
)
11377 rtx shift_reg
= NULL
;
11379 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
11383 if (shift_reg
!= NULL
)
11386 *cost
+= extra_cost
->alu
.log_shift_reg
;
11387 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
11390 *cost
+= extra_cost
->alu
.log_shift
;
11391 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
11396 *cost
+= extra_cost
->alu
.logical
;
11399 if (mode
== DImode
)
11401 *cost
+= COSTS_N_INSNS (1);
11407 *cost
+= LIBCALL_COST (1);
11412 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
11414 *cost
+= COSTS_N_INSNS (3);
11417 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
11418 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
11420 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
11421 /* Assume that if one arm of the if_then_else is a register,
11422 that it will be tied with the result and eliminate the
11423 conditional insn. */
11424 if (REG_P (XEXP (x
, 1)))
11426 else if (REG_P (XEXP (x
, 2)))
11432 if (extra_cost
->alu
.non_exec_costs_exec
)
11433 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
11435 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
11438 *cost
+= op1cost
+ op2cost
;
11444 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
11448 machine_mode op0mode
;
11449 /* We'll mostly assume that the cost of a compare is the cost of the
11450 LHS. However, there are some notable exceptions. */
11452 /* Floating point compares are never done as side-effects. */
11453 op0mode
= GET_MODE (XEXP (x
, 0));
11454 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
11455 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11458 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
11460 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
11462 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
11468 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
11470 *cost
= LIBCALL_COST (2);
11474 /* DImode compares normally take two insns. */
11475 if (op0mode
== DImode
)
11477 *cost
+= COSTS_N_INSNS (1);
11479 *cost
+= 2 * extra_cost
->alu
.arith
;
11483 if (op0mode
== SImode
)
11488 if (XEXP (x
, 1) == const0_rtx
11489 && !(REG_P (XEXP (x
, 0))
11490 || (GET_CODE (XEXP (x
, 0)) == SUBREG
11491 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
11493 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
11495 /* Multiply operations that set the flags are often
11496 significantly more expensive. */
11498 && GET_CODE (XEXP (x
, 0)) == MULT
11499 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
11500 *cost
+= extra_cost
->mult
[0].flag_setting
;
11503 && GET_CODE (XEXP (x
, 0)) == PLUS
11504 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11505 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
11507 *cost
+= extra_cost
->mult
[0].flag_setting
;
11512 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
11513 if (shift_op
!= NULL
)
11515 if (shift_reg
!= NULL
)
11517 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
11520 *cost
+= extra_cost
->alu
.arith_shift_reg
;
11523 *cost
+= extra_cost
->alu
.arith_shift
;
11524 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
11525 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
11530 *cost
+= extra_cost
->alu
.arith
;
11531 if (CONST_INT_P (XEXP (x
, 1))
11532 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
11534 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
11542 *cost
= LIBCALL_COST (2);
11552 /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11555 && TARGET_HARD_FLOAT
11556 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
11557 && (XEXP (x
, 1) == CONST0_RTX (mode
)))
11563 /* Fall through. */
11577 if (outer_code
== SET
)
11579 /* Is it a store-flag operation? */
11580 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
11581 && XEXP (x
, 1) == const0_rtx
)
11583 /* Thumb also needs an IT insn. */
11584 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
11587 if (XEXP (x
, 1) == const0_rtx
)
11592 /* LSR Rd, Rn, #31. */
11594 *cost
+= extra_cost
->alu
.shift
;
11604 *cost
+= COSTS_N_INSNS (1);
11608 /* RSBS T1, Rn, Rn, LSR #31
11610 *cost
+= COSTS_N_INSNS (1);
11612 *cost
+= extra_cost
->alu
.arith_shift
;
11616 /* RSB Rd, Rn, Rn, ASR #1
11617 LSR Rd, Rd, #31. */
11618 *cost
+= COSTS_N_INSNS (1);
11620 *cost
+= (extra_cost
->alu
.arith_shift
11621 + extra_cost
->alu
.shift
);
11627 *cost
+= COSTS_N_INSNS (1);
11629 *cost
+= extra_cost
->alu
.shift
;
11633 /* Remaining cases are either meaningless or would take
11634 three insns anyway. */
11635 *cost
= COSTS_N_INSNS (3);
11638 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11643 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
11644 if (CONST_INT_P (XEXP (x
, 1))
11645 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
11647 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11654 /* Not directly inside a set. If it involves the condition code
11655 register it must be the condition for a branch, cond_exec or
11656 I_T_E operation. Since the comparison is performed elsewhere
11657 this is just the control part which has no additional
11659 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
11660 && XEXP (x
, 1) == const0_rtx
)
11668 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11669 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11672 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
11676 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11678 *cost
= LIBCALL_COST (1);
11682 if (mode
== SImode
)
11685 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
11689 *cost
= LIBCALL_COST (1);
11693 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
11694 && MEM_P (XEXP (x
, 0)))
11696 if (mode
== DImode
)
11697 *cost
+= COSTS_N_INSNS (1);
11702 if (GET_MODE (XEXP (x
, 0)) == SImode
)
11703 *cost
+= extra_cost
->ldst
.load
;
11705 *cost
+= extra_cost
->ldst
.load_sign_extend
;
11707 if (mode
== DImode
)
11708 *cost
+= extra_cost
->alu
.shift
;
11713 /* Widening from less than 32-bits requires an extend operation. */
11714 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11716 /* We have SXTB/SXTH. */
11717 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11719 *cost
+= extra_cost
->alu
.extend
;
11721 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11723 /* Needs two shifts. */
11724 *cost
+= COSTS_N_INSNS (1);
11725 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11727 *cost
+= 2 * extra_cost
->alu
.shift
;
11730 /* Widening beyond 32-bits requires one more insn. */
11731 if (mode
== DImode
)
11733 *cost
+= COSTS_N_INSNS (1);
11735 *cost
+= extra_cost
->alu
.shift
;
11742 || GET_MODE (XEXP (x
, 0)) == SImode
11743 || GET_MODE (XEXP (x
, 0)) == QImode
)
11744 && MEM_P (XEXP (x
, 0)))
11746 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11748 if (mode
== DImode
)
11749 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11754 /* Widening from less than 32-bits requires an extend operation. */
11755 if (GET_MODE (XEXP (x
, 0)) == QImode
)
11757 /* UXTB can be a shorter instruction in Thumb2, but it might
11758 be slower than the AND Rd, Rn, #255 alternative. When
11759 optimizing for speed it should never be slower to use
11760 AND, and we don't really model 16-bit vs 32-bit insns
11763 *cost
+= extra_cost
->alu
.logical
;
11765 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11767 /* We have UXTB/UXTH. */
11768 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11770 *cost
+= extra_cost
->alu
.extend
;
11772 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11774 /* Needs two shifts. It's marginally preferable to use
11775 shifts rather than two BIC instructions as the second
11776 shift may merge with a subsequent insn as a shifter
11778 *cost
= COSTS_N_INSNS (2);
11779 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11781 *cost
+= 2 * extra_cost
->alu
.shift
;
11784 /* Widening beyond 32-bits requires one more insn. */
11785 if (mode
== DImode
)
11787 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11794 /* CONST_INT has no mode, so we cannot tell for sure how many
11795 insns are really going to be needed. The best we can do is
11796 look at the value passed. If it fits in SImode, then assume
11797 that's the mode it will be used for. Otherwise assume it
11798 will be used in DImode. */
11799 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
11804 /* Avoid blowing up in arm_gen_constant (). */
11805 if (!(outer_code
== PLUS
11806 || outer_code
== AND
11807 || outer_code
== IOR
11808 || outer_code
== XOR
11809 || outer_code
== MINUS
))
11813 if (mode
== SImode
)
11815 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
11816 INTVAL (x
), NULL
, NULL
,
11822 *cost
+= COSTS_N_INSNS (arm_gen_constant
11823 (outer_code
, SImode
, NULL
,
11824 trunc_int_for_mode (INTVAL (x
), SImode
),
11826 + arm_gen_constant (outer_code
, SImode
, NULL
,
11827 INTVAL (x
) >> 32, NULL
,
11839 if (arm_arch_thumb2
&& !flag_pic
)
11840 *cost
+= COSTS_N_INSNS (1);
11842 *cost
+= extra_cost
->ldst
.load
;
11845 *cost
+= COSTS_N_INSNS (1);
11849 *cost
+= COSTS_N_INSNS (1);
11851 *cost
+= extra_cost
->alu
.arith
;
11857 *cost
= COSTS_N_INSNS (4);
11862 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11863 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11865 if (vfp3_const_double_rtx (x
))
11868 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
11874 if (mode
== DFmode
)
11875 *cost
+= extra_cost
->ldst
.loadd
;
11877 *cost
+= extra_cost
->ldst
.loadf
;
11880 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
11884 *cost
= COSTS_N_INSNS (4);
11889 if (((TARGET_NEON
&& TARGET_HARD_FLOAT
11890 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
)))
11891 || TARGET_HAVE_MVE
)
11892 && simd_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
11893 *cost
= COSTS_N_INSNS (1);
11895 *cost
= COSTS_N_INSNS (4);
11900 /* When optimizing for size, we prefer constant pool entries to
11901 MOVW/MOVT pairs, so bump the cost of these slightly. */
11908 *cost
+= extra_cost
->alu
.clz
;
11912 if (XEXP (x
, 1) == const0_rtx
)
11915 *cost
+= extra_cost
->alu
.log_shift
;
11916 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11919 /* Fall through. */
11923 *cost
+= COSTS_N_INSNS (1);
11927 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
11928 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11929 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
11930 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11931 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
11932 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
11933 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
11934 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11938 *cost
+= extra_cost
->mult
[1].extend
;
11939 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
11940 ZERO_EXTEND
, 0, speed_p
)
11941 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
11942 ZERO_EXTEND
, 0, speed_p
));
11945 *cost
= LIBCALL_COST (1);
11948 case UNSPEC_VOLATILE
:
11950 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
11953 /* Reading the PC is like reading any other register. Writing it
11954 is more expensive, but we take that into account elsewhere. */
11959 /* TODO: Simple zero_extract of bottom bits using AND. */
11960 /* Fall through. */
11964 && CONST_INT_P (XEXP (x
, 1))
11965 && CONST_INT_P (XEXP (x
, 2)))
11968 *cost
+= extra_cost
->alu
.bfx
;
11969 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11972 /* Without UBFX/SBFX, need to resort to shift operations. */
11973 *cost
+= COSTS_N_INSNS (1);
11975 *cost
+= 2 * extra_cost
->alu
.shift
;
11976 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
11980 if (TARGET_HARD_FLOAT
)
11983 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
11985 && GET_MODE (XEXP (x
, 0)) == HFmode
)
11987 /* Pre v8, widening HF->DF is a two-step process, first
11988 widening to SFmode. */
11989 *cost
+= COSTS_N_INSNS (1);
11991 *cost
+= extra_cost
->fp
[0].widen
;
11993 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11997 *cost
= LIBCALL_COST (1);
12000 case FLOAT_TRUNCATE
:
12001 if (TARGET_HARD_FLOAT
)
12004 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
12005 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
12007 /* Vector modes? */
12009 *cost
= LIBCALL_COST (1);
12013 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
12015 rtx op0
= XEXP (x
, 0);
12016 rtx op1
= XEXP (x
, 1);
12017 rtx op2
= XEXP (x
, 2);
12020 /* vfms or vfnma. */
12021 if (GET_CODE (op0
) == NEG
)
12022 op0
= XEXP (op0
, 0);
12024 /* vfnms or vfnma. */
12025 if (GET_CODE (op2
) == NEG
)
12026 op2
= XEXP (op2
, 0);
12028 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
12029 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
12030 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
12033 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
12038 *cost
= LIBCALL_COST (3);
12043 if (TARGET_HARD_FLOAT
)
12045 /* The *combine_vcvtf2i reduces a vmul+vcvt into
12046 a vcvt fixed-point conversion. */
12047 if (code
== FIX
&& mode
== SImode
12048 && GET_CODE (XEXP (x
, 0)) == FIX
12049 && GET_MODE (XEXP (x
, 0)) == SFmode
12050 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
12051 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
12055 *cost
+= extra_cost
->fp
[0].toint
;
12057 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
12062 if (GET_MODE_CLASS (mode
) == MODE_INT
)
12064 mode
= GET_MODE (XEXP (x
, 0));
12066 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
12067 /* Strip of the 'cost' of rounding towards zero. */
12068 if (GET_CODE (XEXP (x
, 0)) == FIX
)
12069 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
12072 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
12073 /* ??? Increase the cost to deal with transferring from
12074 FP -> CORE registers? */
12077 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
12081 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
12084 /* Vector costs? */
12086 *cost
= LIBCALL_COST (1);
12090 case UNSIGNED_FLOAT
:
12091 if (TARGET_HARD_FLOAT
)
12093 /* ??? Increase the cost to deal with transferring from CORE
12094 -> FP registers? */
12096 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
12099 *cost
= LIBCALL_COST (1);
12107 /* Just a guess. Guess number of instructions in the asm
12108 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
12109 though (see PR60663). */
12110 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
12111 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
12113 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
12117 if (mode
!= VOIDmode
)
12118 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
12120 *cost
= COSTS_N_INSNS (4); /* Who knows? */
12125 #undef HANDLE_NARROW_SHIFT_ARITH
12127 /* RTX costs entry point. */
12130 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
12131 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
12134 int code
= GET_CODE (x
);
12135 gcc_assert (current_tune
->insn_extra_cost
);
12137 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
12138 (enum rtx_code
) outer_code
,
12139 current_tune
->insn_extra_cost
,
12142 if (dump_file
&& arm_verbose_cost
)
12144 print_rtl_single (dump_file
, x
);
12145 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
12146 *total
, result
? "final" : "partial");
12152 arm_insn_cost (rtx_insn
*insn
, bool speed
)
12156 /* Don't cost a simple reg-reg move at a full insn cost: such moves
12157 will likely disappear during register allocation. */
12158 if (!reload_completed
12159 && GET_CODE (PATTERN (insn
)) == SET
12160 && REG_P (SET_DEST (PATTERN (insn
)))
12161 && REG_P (SET_SRC (PATTERN (insn
))))
12163 cost
= pattern_cost (PATTERN (insn
), speed
);
12164 /* If the cost is zero, then it's likely a complex insn. We don't want the
12165 cost of these to be less than something we know about. */
12166 return cost
? cost
: COSTS_N_INSNS (2);
12169 /* All address computations that can be done are free, but rtx cost returns
12170 the same for practically all of them. So we weight the different types
12171 of address here in the order (most pref first):
12172 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
12174 arm_arm_address_cost (rtx x
)
12176 enum rtx_code c
= GET_CODE (x
);
12178 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
12180 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
12185 if (CONST_INT_P (XEXP (x
, 1)))
12188 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
12198 arm_thumb_address_cost (rtx x
)
12200 enum rtx_code c
= GET_CODE (x
);
12205 && REG_P (XEXP (x
, 0))
12206 && CONST_INT_P (XEXP (x
, 1)))
12213 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
12214 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
12216 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
12219 /* Adjust cost hook for XScale. */
12221 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12224 /* Some true dependencies can have a higher cost depending
12225 on precisely how certain input operands are used. */
12227 && recog_memoized (insn
) >= 0
12228 && recog_memoized (dep
) >= 0)
12230 int shift_opnum
= get_attr_shift (insn
);
12231 enum attr_type attr_type
= get_attr_type (dep
);
12233 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12234 operand for INSN. If we have a shifted input operand and the
12235 instruction we depend on is another ALU instruction, then we may
12236 have to account for an additional stall. */
12237 if (shift_opnum
!= 0
12238 && (attr_type
== TYPE_ALU_SHIFT_IMM_LSL_1TO4
12239 || attr_type
== TYPE_ALU_SHIFT_IMM_OTHER
12240 || attr_type
== TYPE_ALUS_SHIFT_IMM
12241 || attr_type
== TYPE_LOGIC_SHIFT_IMM
12242 || attr_type
== TYPE_LOGICS_SHIFT_IMM
12243 || attr_type
== TYPE_ALU_SHIFT_REG
12244 || attr_type
== TYPE_ALUS_SHIFT_REG
12245 || attr_type
== TYPE_LOGIC_SHIFT_REG
12246 || attr_type
== TYPE_LOGICS_SHIFT_REG
12247 || attr_type
== TYPE_MOV_SHIFT
12248 || attr_type
== TYPE_MVN_SHIFT
12249 || attr_type
== TYPE_MOV_SHIFT_REG
12250 || attr_type
== TYPE_MVN_SHIFT_REG
))
12252 rtx shifted_operand
;
12255 /* Get the shifted operand. */
12256 extract_insn (insn
);
12257 shifted_operand
= recog_data
.operand
[shift_opnum
];
12259 /* Iterate over all the operands in DEP. If we write an operand
12260 that overlaps with SHIFTED_OPERAND, then we have increase the
12261 cost of this dependency. */
12262 extract_insn (dep
);
12263 preprocess_constraints (dep
);
12264 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
12266 /* We can ignore strict inputs. */
12267 if (recog_data
.operand_type
[opno
] == OP_IN
)
12270 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
12282 /* Adjust cost hook for Cortex A9. */
12284 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12294 case REG_DEP_OUTPUT
:
12295 if (recog_memoized (insn
) >= 0
12296 && recog_memoized (dep
) >= 0)
12298 if (GET_CODE (PATTERN (insn
)) == SET
)
12301 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
12303 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
12305 enum attr_type attr_type_insn
= get_attr_type (insn
);
12306 enum attr_type attr_type_dep
= get_attr_type (dep
);
12308 /* By default all dependencies of the form
12311 have an extra latency of 1 cycle because
12312 of the input and output dependency in this
12313 case. However this gets modeled as an true
12314 dependency and hence all these checks. */
12315 if (REG_P (SET_DEST (PATTERN (insn
)))
12316 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
12318 /* FMACS is a special case where the dependent
12319 instruction can be issued 3 cycles before
12320 the normal latency in case of an output
12322 if ((attr_type_insn
== TYPE_FMACS
12323 || attr_type_insn
== TYPE_FMACD
)
12324 && (attr_type_dep
== TYPE_FMACS
12325 || attr_type_dep
== TYPE_FMACD
))
12327 if (dep_type
== REG_DEP_OUTPUT
)
12328 *cost
= insn_default_latency (dep
) - 3;
12330 *cost
= insn_default_latency (dep
);
12335 if (dep_type
== REG_DEP_OUTPUT
)
12336 *cost
= insn_default_latency (dep
) + 1;
12338 *cost
= insn_default_latency (dep
);
12348 gcc_unreachable ();
12354 /* Adjust cost hook for FA726TE. */
12356 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12359 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12360 have penalty of 3. */
12361 if (dep_type
== REG_DEP_TRUE
12362 && recog_memoized (insn
) >= 0
12363 && recog_memoized (dep
) >= 0
12364 && get_attr_conds (dep
) == CONDS_SET
)
12366 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12367 if (get_attr_conds (insn
) == CONDS_USE
12368 && get_attr_type (insn
) != TYPE_BRANCH
)
12374 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
12375 || get_attr_conds (insn
) == CONDS_USE
)
12385 /* Implement TARGET_REGISTER_MOVE_COST.
12387 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12388 it is typically more expensive than a single memory access. We set
12389 the cost to less than two memory accesses so that floating
12390 point to integer conversion does not go through memory. */
12393 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
12394 reg_class_t from
, reg_class_t to
)
12398 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
12399 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
12401 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
12402 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
12404 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
12411 if (from
== HI_REGS
|| to
== HI_REGS
)
12418 /* Implement TARGET_MEMORY_MOVE_COST. */
12421 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
12422 bool in ATTRIBUTE_UNUSED
)
12428 if (GET_MODE_SIZE (mode
) < 4)
12431 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
12435 /* Vectorizer cost model implementation. */
12437 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12439 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
12441 int misalign ATTRIBUTE_UNUSED
)
12445 switch (type_of_cost
)
12448 return current_tune
->vec_costs
->scalar_stmt_cost
;
12451 return current_tune
->vec_costs
->scalar_load_cost
;
12454 return current_tune
->vec_costs
->scalar_store_cost
;
12457 return current_tune
->vec_costs
->vec_stmt_cost
;
12460 return current_tune
->vec_costs
->vec_align_load_cost
;
12463 return current_tune
->vec_costs
->vec_store_cost
;
12465 case vec_to_scalar
:
12466 return current_tune
->vec_costs
->vec_to_scalar_cost
;
12468 case scalar_to_vec
:
12469 return current_tune
->vec_costs
->scalar_to_vec_cost
;
12471 case unaligned_load
:
12472 case vector_gather_load
:
12473 return current_tune
->vec_costs
->vec_unalign_load_cost
;
12475 case unaligned_store
:
12476 case vector_scatter_store
:
12477 return current_tune
->vec_costs
->vec_unalign_store_cost
;
12479 case cond_branch_taken
:
12480 return current_tune
->vec_costs
->cond_taken_branch_cost
;
12482 case cond_branch_not_taken
:
12483 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
12486 case vec_promote_demote
:
12487 return current_tune
->vec_costs
->vec_stmt_cost
;
12489 case vec_construct
:
12490 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
12491 return elements
/ 2 + 1;
12494 gcc_unreachable ();
12498 /* Return true if and only if this insn can dual-issue only as older. */
12500 cortexa7_older_only (rtx_insn
*insn
)
12502 if (recog_memoized (insn
) < 0)
12505 switch (get_attr_type (insn
))
12507 case TYPE_ALU_DSP_REG
:
12508 case TYPE_ALU_SREG
:
12509 case TYPE_ALUS_SREG
:
12510 case TYPE_LOGIC_REG
:
12511 case TYPE_LOGICS_REG
:
12513 case TYPE_ADCS_REG
:
12518 case TYPE_SHIFT_IMM
:
12519 case TYPE_SHIFT_REG
:
12520 case TYPE_LOAD_BYTE
:
12523 case TYPE_FFARITHS
:
12525 case TYPE_FFARITHD
:
12543 case TYPE_F_STORES
:
12550 /* Return true if and only if this insn can dual-issue as younger. */
12552 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
12554 if (recog_memoized (insn
) < 0)
12557 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
12561 switch (get_attr_type (insn
))
12564 case TYPE_ALUS_IMM
:
12565 case TYPE_LOGIC_IMM
:
12566 case TYPE_LOGICS_IMM
:
12571 case TYPE_MOV_SHIFT
:
12572 case TYPE_MOV_SHIFT_REG
:
12582 /* Look for an instruction that can dual issue only as an older
12583 instruction, and move it in front of any instructions that can
12584 dual-issue as younger, while preserving the relative order of all
12585 other instructions in the ready list. This is a hueuristic to help
12586 dual-issue in later cycles, by postponing issue of more flexible
12587 instructions. This heuristic may affect dual issue opportunities
12588 in the current cycle. */
12590 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
12591 int *n_readyp
, int clock
)
12594 int first_older_only
= -1, first_younger
= -1;
12598 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12602 /* Traverse the ready list from the head (the instruction to issue
12603 first), and looking for the first instruction that can issue as
12604 younger and the first instruction that can dual-issue only as
12606 for (i
= *n_readyp
- 1; i
>= 0; i
--)
12608 rtx_insn
*insn
= ready
[i
];
12609 if (cortexa7_older_only (insn
))
12611 first_older_only
= i
;
12613 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
12616 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
12620 /* Nothing to reorder because either no younger insn found or insn
12621 that can dual-issue only as older appears before any insn that
12622 can dual-issue as younger. */
12623 if (first_younger
== -1)
12626 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
12630 /* Nothing to reorder because no older-only insn in the ready list. */
12631 if (first_older_only
== -1)
12634 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
12638 /* Move first_older_only insn before first_younger. */
12640 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
12641 INSN_UID(ready
[first_older_only
]),
12642 INSN_UID(ready
[first_younger
]));
12643 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
12644 for (i
= first_older_only
; i
< first_younger
; i
++)
12646 ready
[i
] = ready
[i
+1];
12649 ready
[i
] = first_older_only_insn
;
12653 /* Implement TARGET_SCHED_REORDER. */
12655 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
12660 case TARGET_CPU_cortexa7
:
12661 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
12664 /* Do nothing for other cores. */
12668 return arm_issue_rate ();
12671 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12672 It corrects the value of COST based on the relationship between
12673 INSN and DEP through the dependence LINK. It returns the new
12674 value. There is a per-core adjust_cost hook to adjust scheduler costs
12675 and the per-core hook can choose to completely override the generic
12676 adjust_cost function. Only put bits of code into arm_adjust_cost that
12677 are common across all cores. */
12679 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
12684 /* When generating Thumb-1 code, we want to place flag-setting operations
12685 close to a conditional branch which depends on them, so that we can
12686 omit the comparison. */
12689 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
12690 && recog_memoized (dep
) >= 0
12691 && get_attr_conds (dep
) == CONDS_SET
)
12694 if (current_tune
->sched_adjust_cost
!= NULL
)
12696 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
12700 /* XXX Is this strictly true? */
12701 if (dep_type
== REG_DEP_ANTI
12702 || dep_type
== REG_DEP_OUTPUT
)
12705 /* Call insns don't incur a stall, even if they follow a load. */
12710 if ((i_pat
= single_set (insn
)) != NULL
12711 && MEM_P (SET_SRC (i_pat
))
12712 && (d_pat
= single_set (dep
)) != NULL
12713 && MEM_P (SET_DEST (d_pat
)))
12715 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
12716 /* This is a load after a store, there is no conflict if the load reads
12717 from a cached area. Assume that loads from the stack, and from the
12718 constant pool are cached, and that others will miss. This is a
12721 if ((SYMBOL_REF_P (src_mem
)
12722 && CONSTANT_POOL_ADDRESS_P (src_mem
))
12723 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
12724 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
12725 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
12733 arm_max_conditional_execute (void)
12735 return max_insns_skipped
;
12739 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
12742 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12744 return (optimize
> 0) ? 2 : 0;
12748 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12750 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12753 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12754 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12755 sequences of non-executed instructions in IT blocks probably take the same
12756 amount of time as executed instructions (and the IT instruction itself takes
12757 space in icache). This function was experimentally determined to give good
12758 results on a popular embedded benchmark. */
12761 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12763 return (TARGET_32BIT
&& speed_p
) ? 1
12764 : arm_default_branch_cost (speed_p
, predictable_p
);
12768 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
12770 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12773 static bool fp_consts_inited
= false;
12775 static REAL_VALUE_TYPE value_fp0
;
12778 init_fp_table (void)
12782 r
= REAL_VALUE_ATOF ("0", DFmode
);
12784 fp_consts_inited
= true;
12787 /* Return TRUE if rtx X is a valid immediate FP constant. */
12789 arm_const_double_rtx (rtx x
)
12791 const REAL_VALUE_TYPE
*r
;
12793 if (!fp_consts_inited
)
12796 r
= CONST_DOUBLE_REAL_VALUE (x
);
12797 if (REAL_VALUE_MINUS_ZERO (*r
))
12800 if (real_equal (r
, &value_fp0
))
12806 /* VFPv3 has a fairly wide range of representable immediates, formed from
12807 "quarter-precision" floating-point values. These can be evaluated using this
12808 formula (with ^ for exponentiation):
12812 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12813 16 <= n <= 31 and 0 <= r <= 7.
12815 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12817 - A (most-significant) is the sign bit.
12818 - BCD are the exponent (encoded as r XOR 3).
12819 - EFGH are the mantissa (encoded as n - 16).
12822 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12823 fconst[sd] instruction, or -1 if X isn't suitable. */
12825 vfp3_const_double_index (rtx x
)
12827 REAL_VALUE_TYPE r
, m
;
12828 int sign
, exponent
;
12829 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12830 unsigned HOST_WIDE_INT mask
;
12831 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12834 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12837 r
= *CONST_DOUBLE_REAL_VALUE (x
);
12839 /* We can't represent these things, so detect them first. */
12840 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12843 /* Extract sign, exponent and mantissa. */
12844 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12845 r
= real_value_abs (&r
);
12846 exponent
= REAL_EXP (&r
);
12847 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12848 highest (sign) bit, with a fixed binary point at bit point_pos.
12849 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12850 bits for the mantissa, this may fail (low bits would be lost). */
12851 real_ldexp (&m
, &r
, point_pos
- exponent
);
12852 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12853 mantissa
= w
.elt (0);
12854 mant_hi
= w
.elt (1);
12856 /* If there are bits set in the low part of the mantissa, we can't
12857 represent this value. */
12861 /* Now make it so that mantissa contains the most-significant bits, and move
12862 the point_pos to indicate that the least-significant bits have been
12864 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12865 mantissa
= mant_hi
;
12867 /* We can permit four significant bits of mantissa only, plus a high bit
12868 which is always 1. */
12869 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
12870 if ((mantissa
& mask
) != 0)
12873 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12874 mantissa
>>= point_pos
- 5;
12876 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12877 floating-point immediate zero with Neon using an integer-zero load, but
12878 that case is handled elsewhere.) */
12882 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12884 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12885 normalized significands are in the range [1, 2). (Our mantissa is shifted
12886 left 4 places at this point relative to normalized IEEE754 values). GCC
12887 internally uses [0.5, 1) (see real.cc), so the exponent returned from
12888 REAL_EXP must be altered. */
12889 exponent
= 5 - exponent
;
12891 if (exponent
< 0 || exponent
> 7)
12894 /* Sign, mantissa and exponent are now in the correct form to plug into the
12895 formula described in the comment above. */
12896 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12899 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12901 vfp3_const_double_rtx (rtx x
)
12906 return vfp3_const_double_index (x
) != -1;
12909 /* Recognize immediates which can be used in various Neon and MVE instructions.
12910 Legal immediates are described by the following table (for VMVN variants, the
12911 bitwise inverse of the constant shown is recognized. In either case, VMOV
12912 is output and the correct instruction to use for a given constant is chosen
12913 by the assembler). The constant shown is replicated across all elements of
12914 the destination vector.
12916 insn elems variant constant (binary)
12917 ---- ----- ------- -----------------
12918 vmov i32 0 00000000 00000000 00000000 abcdefgh
12919 vmov i32 1 00000000 00000000 abcdefgh 00000000
12920 vmov i32 2 00000000 abcdefgh 00000000 00000000
12921 vmov i32 3 abcdefgh 00000000 00000000 00000000
12922 vmov i16 4 00000000 abcdefgh
12923 vmov i16 5 abcdefgh 00000000
12924 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12925 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12926 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12927 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12928 vmvn i16 10 00000000 abcdefgh
12929 vmvn i16 11 abcdefgh 00000000
12930 vmov i32 12 00000000 00000000 abcdefgh 11111111
12931 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12932 vmov i32 14 00000000 abcdefgh 11111111 11111111
12933 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12934 vmov i8 16 abcdefgh
12935 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12936 eeeeeeee ffffffff gggggggg hhhhhhhh
12937 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12938 vmov f32 19 00000000 00000000 00000000 00000000
12940 For case 18, B = !b. Representable values are exactly those accepted by
12941 vfp3_const_double_index, but are output as floating-point numbers rather
12944 For case 19, we will change it to vmov.i32 when assembling.
12946 Variants 0-5 (inclusive) may also be used as immediates for the second
12947 operand of VORR/VBIC instructions.
12949 The INVERSE argument causes the bitwise inverse of the given operand to be
12950 recognized instead (used for recognizing legal immediates for the VAND/VORN
12951 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12952 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12953 output, rather than the real insns vbic/vorr).
12955 INVERSE makes no difference to the recognition of float vectors.
12957 The return value is the variant of immediate as shown in the above table, or
12958 -1 if the given value doesn't match any of the listed patterns.
12961 simd_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12962 rtx
*modconst
, int *elementwidth
)
12964 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12966 for (i = 0; i < idx; i += (STRIDE)) \
12971 immtype = (CLASS); \
12972 elsize = (ELSIZE); \
12976 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12977 unsigned int innersize
;
12978 unsigned char bytes
[16] = {};
12979 int immtype
= -1, matches
;
12980 unsigned int invmask
= inverse
? 0xff : 0;
12981 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12984 n_elts
= CONST_VECTOR_NUNITS (op
);
12988 gcc_assert (mode
!= VOIDmode
);
12991 innersize
= GET_MODE_UNIT_SIZE (mode
);
12993 /* Only support 128-bit vectors for MVE. */
12994 if (TARGET_HAVE_MVE
12996 || VALID_MVE_PRED_MODE (mode
)
12997 || n_elts
* innersize
!= 16))
13000 if (!TARGET_HAVE_MVE
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_BOOL
)
13003 /* Vectors of float constants. */
13004 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
13006 rtx el0
= CONST_VECTOR_ELT (op
, 0);
13008 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
13011 /* FP16 vectors cannot be represented. */
13012 if (GET_MODE_INNER (mode
) == HFmode
)
13015 /* All elements in the vector must be the same. Note that 0.0 and -0.0
13016 are distinct in this context. */
13017 if (!const_vec_duplicate_p (op
))
13021 *modconst
= CONST_VECTOR_ELT (op
, 0);
13026 if (el0
== CONST0_RTX (GET_MODE (el0
)))
13032 /* The tricks done in the code below apply for little-endian vector layout.
13033 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
13034 FIXME: Implement logic for big-endian vectors. */
13035 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
13038 /* Splat vector constant out into a byte vector. */
13039 for (i
= 0; i
< n_elts
; i
++)
13041 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
13042 unsigned HOST_WIDE_INT elpart
;
13044 gcc_assert (CONST_INT_P (el
));
13045 elpart
= INTVAL (el
);
13047 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
13049 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
13050 elpart
>>= BITS_PER_UNIT
;
13054 /* Sanity check. */
13055 gcc_assert (idx
== GET_MODE_SIZE (mode
));
13059 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
13060 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
13062 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
13063 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
13065 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
13066 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
13068 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
13069 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
13071 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
13073 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
13075 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
13076 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
13078 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
13079 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
13081 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13082 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
13084 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13085 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
13087 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
13089 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
13091 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
13092 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
13094 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
13095 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
13097 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13098 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
13100 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
13101 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
13103 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
13105 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
13106 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
13114 *elementwidth
= elsize
;
13118 unsigned HOST_WIDE_INT imm
= 0;
13120 /* Un-invert bytes of recognized vector, if necessary. */
13122 for (i
= 0; i
< idx
; i
++)
13123 bytes
[i
] ^= invmask
;
13127 /* FIXME: Broken on 32-bit H_W_I hosts. */
13128 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
13130 for (i
= 0; i
< 8; i
++)
13131 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
13132 << (i
* BITS_PER_UNIT
);
13134 *modconst
= GEN_INT (imm
);
13138 unsigned HOST_WIDE_INT imm
= 0;
13140 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
13141 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
13143 *modconst
= GEN_INT (imm
);
13151 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13152 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
13153 (or zero for float elements), and a modified constant (whatever should be
13154 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
13155 modified to "simd_immediate_valid_for_move" as this function will be used
13156 both by neon and mve. */
13158 simd_immediate_valid_for_move (rtx op
, machine_mode mode
,
13159 rtx
*modconst
, int *elementwidth
)
13163 int retval
= simd_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
13169 *modconst
= tmpconst
;
13172 *elementwidth
= tmpwidth
;
13177 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
13178 the immediate is valid, write a constant suitable for using as an operand
13179 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13180 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
13183 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
13184 rtx
*modconst
, int *elementwidth
)
13188 int retval
= simd_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
13190 if (retval
< 0 || retval
> 5)
13194 *modconst
= tmpconst
;
13197 *elementwidth
= tmpwidth
;
13202 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
13203 the immediate is valid, write a constant suitable for using as an operand
13204 to VSHR/VSHL to *MODCONST and the corresponding element width to
13205 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13206 because they have different limitations. */
13209 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
13210 rtx
*modconst
, int *elementwidth
,
13213 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
13214 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
13215 unsigned HOST_WIDE_INT last_elt
= 0;
13216 unsigned HOST_WIDE_INT maxshift
;
13218 /* Split vector constant out into a byte vector. */
13219 for (i
= 0; i
< n_elts
; i
++)
13221 rtx el
= CONST_VECTOR_ELT (op
, i
);
13222 unsigned HOST_WIDE_INT elpart
;
13224 if (CONST_INT_P (el
))
13225 elpart
= INTVAL (el
);
13226 else if (CONST_DOUBLE_P (el
))
13229 gcc_unreachable ();
13231 if (i
!= 0 && elpart
!= last_elt
)
13237 /* Shift less than element size. */
13238 maxshift
= innersize
* 8;
13242 /* Left shift immediate value can be from 0 to <size>-1. */
13243 if (last_elt
>= maxshift
)
13248 /* Right shift immediate value can be from 1 to <size>. */
13249 if (last_elt
== 0 || last_elt
> maxshift
)
13254 *elementwidth
= innersize
* 8;
13257 *modconst
= CONST_VECTOR_ELT (op
, 0);
13262 /* Return a string suitable for output of Neon immediate logic operation
13266 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
13267 int inverse
, int quad
)
13269 int width
, is_valid
;
13270 static char templ
[40];
13272 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
13274 gcc_assert (is_valid
!= 0);
13277 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
13279 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
13284 /* Return a string suitable for output of Neon immediate shift operation
13285 (VSHR or VSHL) MNEM. */
13288 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
13289 machine_mode mode
, int quad
,
13292 int width
, is_valid
;
13293 static char templ
[40];
13295 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
13296 gcc_assert (is_valid
!= 0);
13299 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
13301 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
13306 /* Output a sequence of pairwise operations to implement a reduction.
13307 NOTE: We do "too much work" here, because pairwise operations work on two
13308 registers-worth of operands in one go. Unfortunately we can't exploit those
13309 extra calculations to do the full operation in fewer steps, I don't think.
13310 Although all vector elements of the result but the first are ignored, we
13311 actually calculate the same result in each of the elements. An alternative
13312 such as initially loading a vector with zero to use as each of the second
13313 operands would use up an additional register and take an extra instruction,
13314 for no particular gain. */
13317 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
13318 rtx (*reduc
) (rtx
, rtx
, rtx
))
13320 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
13323 for (i
= parts
/ 2; i
>= 1; i
/= 2)
13325 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
13326 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
13331 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13332 loaded into a register using VDUP.
13334 If this is the case, and GENERATE is set, we also generate
13335 instructions to do this and return an RTX to assign to the register. */
13338 neon_vdup_constant (rtx vals
, bool generate
)
13340 machine_mode mode
= GET_MODE (vals
);
13341 machine_mode inner_mode
= GET_MODE_INNER (mode
);
13344 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
13347 if (!const_vec_duplicate_p (vals
, &x
))
13348 /* The elements are not all the same. We could handle repeating
13349 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13350 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13357 /* We can load this constant by using VDUP and a constant in a
13358 single ARM register. This will be cheaper than a vector
13361 x
= copy_to_mode_reg (inner_mode
, x
);
13362 return gen_vec_duplicate (mode
, x
);
13365 /* Return a HI representation of CONST_VEC suitable for MVE predicates. */
13367 mve_bool_vec_to_const (rtx const_vec
)
13369 machine_mode mode
= GET_MODE (const_vec
);
13371 if (!VECTOR_MODE_P (mode
))
13374 unsigned n_elts
= GET_MODE_NUNITS (mode
);
13375 unsigned el_prec
= GET_MODE_PRECISION (GET_MODE_INNER (mode
));
13376 unsigned shift_c
= 16 / n_elts
;
13380 for (i
= 0; i
< n_elts
; i
++)
13382 rtx el
= CONST_VECTOR_ELT (const_vec
, i
);
13383 unsigned HOST_WIDE_INT elpart
;
13385 gcc_assert (CONST_INT_P (el
));
13386 elpart
= INTVAL (el
) & ((1U << el_prec
) - 1);
13388 unsigned index
= BYTES_BIG_ENDIAN
? n_elts
- i
- 1 : i
;
13390 hi_val
|= elpart
<< (index
* shift_c
);
13392 /* We are using mov immediate to encode this constant which writes 32-bits
13393 so we need to make sure the top 16-bits are all 0, otherwise we can't
13394 guarantee we can actually write this immediate. */
13395 return gen_int_mode (hi_val
, SImode
);
13398 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13399 constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13402 If this is the case, and GENERATE is set, we also generate code to do
13403 this and return an RTX to copy into the register. */
13406 neon_make_constant (rtx vals
, bool generate
)
13408 machine_mode mode
= GET_MODE (vals
);
13410 rtx const_vec
= NULL_RTX
;
13411 int n_elts
= GET_MODE_NUNITS (mode
);
13415 if (GET_CODE (vals
) == CONST_VECTOR
)
13417 else if (GET_CODE (vals
) == PARALLEL
)
13419 /* A CONST_VECTOR must contain only CONST_INTs and
13420 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13421 Only store valid constants in a CONST_VECTOR. */
13422 for (i
= 0; i
< n_elts
; ++i
)
13424 rtx x
= XVECEXP (vals
, 0, i
);
13425 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
13428 if (n_const
== n_elts
)
13429 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
13432 gcc_unreachable ();
13434 if (const_vec
!= NULL
13435 && simd_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
13436 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13438 else if (TARGET_HAVE_MVE
&& VALID_MVE_PRED_MODE(mode
))
13439 return mve_bool_vec_to_const (const_vec
);
13440 else if ((target
= neon_vdup_constant (vals
, generate
)) != NULL_RTX
)
13441 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13442 pipeline cycle; creating the constant takes one or two ARM
13443 pipeline cycles. */
13445 else if (const_vec
!= NULL_RTX
)
13446 /* Load from constant pool. On Cortex-A8 this takes two cycles
13447 (for either double or quad vectors). We cannot take advantage
13448 of single-cycle VLD1 because we need a PC-relative addressing
13450 return arm_disable_literal_pool
? NULL_RTX
: const_vec
;
13452 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13453 We cannot construct an initializer. */
13457 /* Initialize vector TARGET to VALS. */
13460 neon_expand_vector_init (rtx target
, rtx vals
)
13462 machine_mode mode
= GET_MODE (target
);
13463 machine_mode inner_mode
= GET_MODE_INNER (mode
);
13464 int n_elts
= GET_MODE_NUNITS (mode
);
13465 int n_var
= 0, one_var
= -1;
13466 bool all_same
= true;
13470 for (i
= 0; i
< n_elts
; ++i
)
13472 x
= XVECEXP (vals
, 0, i
);
13473 if (!CONSTANT_P (x
))
13474 ++n_var
, one_var
= i
;
13476 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
13482 rtx constant
= neon_make_constant (vals
);
13483 if (constant
!= NULL_RTX
)
13485 emit_move_insn (target
, constant
);
13490 /* Splat a single non-constant element if we can. */
13491 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
13493 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
13494 emit_insn (gen_rtx_SET (target
, gen_vec_duplicate (mode
, x
)));
13498 /* One field is non-constant. Load constant then overwrite varying
13499 field. This is more efficient than using the stack. */
13502 rtx copy
= copy_rtx (vals
);
13503 rtx merge_mask
= GEN_INT (1 << one_var
);
13505 /* Load constant part of vector, substitute neighboring value for
13506 varying element. */
13507 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
13508 neon_expand_vector_init (target
, copy
);
13510 /* Insert variable. */
13511 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
13512 emit_insn (gen_vec_set_internal (mode
, target
, x
, merge_mask
, target
));
13516 /* Construct the vector in memory one field at a time
13517 and load the whole vector. */
13518 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
13519 for (i
= 0; i
< n_elts
; i
++)
13520 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
13521 i
* GET_MODE_SIZE (inner_mode
)),
13522 XVECEXP (vals
, 0, i
));
13523 emit_move_insn (target
, mem
);
13526 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13527 ERR if it doesn't. EXP indicates the source location, which includes the
13528 inlining history for intrinsics. */
13531 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
13532 const_tree exp
, const char *desc
)
13534 HOST_WIDE_INT lane
;
13536 gcc_assert (CONST_INT_P (operand
));
13538 lane
= INTVAL (operand
);
13540 if (lane
< low
|| lane
>= high
)
13543 error_at (EXPR_LOCATION (exp
),
13544 "%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
13546 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
13550 /* Bounds-check lanes. */
13553 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
13556 bounds_check (operand
, low
, high
, exp
, "lane");
13559 /* Bounds-check constants. */
13562 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
13564 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
13568 neon_element_bits (machine_mode mode
)
13570 return GET_MODE_UNIT_BITSIZE (mode
);
13574 /* Predicates for `match_operand' and `match_operator'. */
13576 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13577 WB level is 2 if full writeback address modes are allowed, 1
13578 if limited writeback address modes (POST_INC and PRE_DEC) are
13579 allowed and 0 if no writeback at all is supported. */
13582 arm_coproc_mem_operand_wb (rtx op
, int wb_level
)
13584 gcc_assert (wb_level
== 0 || wb_level
== 1 || wb_level
== 2);
13587 /* Reject eliminable registers. */
13588 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
13589 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13590 || reg_mentioned_p (arg_pointer_rtx
, op
)
13591 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13592 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13593 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13594 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13597 /* Constants are converted into offsets from labels. */
13601 ind
= XEXP (op
, 0);
13603 if (reload_completed
13604 && (LABEL_REF_P (ind
)
13605 || (GET_CODE (ind
) == CONST
13606 && GET_CODE (XEXP (ind
, 0)) == PLUS
13607 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13608 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13611 /* Match: (mem (reg)). */
13613 return arm_address_register_rtx_p (ind
, 0);
13615 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13616 acceptable in any case (subject to verification by
13617 arm_address_register_rtx_p). We need full writeback to accept
13618 PRE_INC and POST_DEC, and at least restricted writeback for
13619 PRE_INC and POST_DEC. */
13621 && (GET_CODE (ind
) == POST_INC
13622 || GET_CODE (ind
) == PRE_DEC
13624 && (GET_CODE (ind
) == PRE_INC
13625 || GET_CODE (ind
) == POST_DEC
))))
13626 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13629 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
13630 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
13631 && GET_CODE (XEXP (ind
, 1)) == PLUS
13632 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
13633 ind
= XEXP (ind
, 1);
13639 The encoded immediate for 16-bit modes is multiplied by 2,
13640 while the encoded immediate for 32-bit and 64-bit modes is
13641 multiplied by 4. */
13642 int factor
= MIN (GET_MODE_SIZE (GET_MODE (op
)), 4);
13643 if (GET_CODE (ind
) == PLUS
13644 && REG_P (XEXP (ind
, 0))
13645 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13646 && CONST_INT_P (XEXP (ind
, 1))
13647 && IN_RANGE (INTVAL (XEXP (ind
, 1)), -255 * factor
, 255 * factor
)
13648 && (INTVAL (XEXP (ind
, 1)) & (factor
- 1)) == 0)
13654 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13655 WB is true if full writeback address modes are allowed and is false
13656 if limited writeback address modes (POST_INC and PRE_DEC) are
13659 int arm_coproc_mem_operand (rtx op
, bool wb
)
13661 return arm_coproc_mem_operand_wb (op
, wb
? 2 : 1);
13664 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13665 context in which no writeback address modes are allowed. */
13668 arm_coproc_mem_operand_no_writeback (rtx op
)
13670 return arm_coproc_mem_operand_wb (op
, 0);
13673 /* In non-STRICT mode, return the register number; in STRICT mode return
13674 the hard regno or the replacement if it won't be a mem. Otherwise, return
13675 the original pseudo number. */
13677 arm_effective_regno (rtx op
, bool strict
)
13679 gcc_assert (REG_P (op
));
13680 if (!strict
|| REGNO (op
) < FIRST_PSEUDO_REGISTER
13681 || !reg_renumber
|| reg_renumber
[REGNO (op
)] < 0)
13683 return reg_renumber
[REGNO (op
)];
13686 /* This function returns TRUE on matching mode and op.
13687 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13688 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13690 mve_vector_mem_operand (machine_mode mode
, rtx op
, bool strict
)
13692 enum rtx_code code
;
13695 /* Match: (mem (reg)). */
13698 reg_no
= arm_effective_regno (op
, strict
);
13699 return (((mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
)
13700 ? reg_no
<= LAST_LO_REGNUM
13701 : reg_no
< LAST_ARM_REGNUM
)
13702 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13704 code
= GET_CODE (op
);
13706 if ((code
== POST_INC
13709 || code
== POST_DEC
)
13710 && REG_P (XEXP (op
, 0)))
13712 reg_no
= arm_effective_regno (XEXP (op
, 0), strict
);
13713 return (((mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
)
13714 ? reg_no
<= LAST_LO_REGNUM
13715 :(reg_no
< LAST_ARM_REGNUM
&& reg_no
!= SP_REGNUM
))
13716 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13718 else if (((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
13719 && GET_CODE (XEXP (op
, 1)) == PLUS
13720 && XEXP (op
, 0) == XEXP (XEXP (op
, 1), 0)
13721 && REG_P (XEXP (op
, 0))
13722 && GET_CODE (XEXP (XEXP (op
, 1), 1)) == CONST_INT
)
13723 /* Make sure to only accept PLUS after reload_completed, otherwise
13724 this will interfere with auto_inc's pattern detection. */
13725 || (reload_completed
&& code
== PLUS
&& REG_P (XEXP (op
, 0))
13726 && GET_CODE (XEXP (op
, 1)) == CONST_INT
))
13728 reg_no
= arm_effective_regno (XEXP (op
, 0), strict
);
13730 val
= INTVAL (XEXP (op
, 1));
13732 val
= INTVAL (XEXP(XEXP (op
, 1), 1));
13739 if (abs (val
) > 127)
13746 if (val
% 2 != 0 || abs (val
) > 254)
13751 if (val
% 4 != 0 || abs (val
) > 508)
13757 return ((!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
)
13758 || (MVE_STN_LDW_MODE (mode
)
13759 ? reg_no
<= LAST_LO_REGNUM
13760 : (reg_no
< LAST_ARM_REGNUM
13761 && (code
== PLUS
|| reg_no
!= SP_REGNUM
))));
13766 /* Return TRUE if OP is a memory operand which we can load or store a vector
13767 to/from. TYPE is one of the following values:
13768 0 - Vector load/stor (vldr)
13769 1 - Core registers (ldm)
13770 2 - Element/structure loads (vld1)
13773 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
13777 /* Reject eliminable registers. */
13778 if (strict
&& ! (reload_in_progress
|| reload_completed
)
13779 && (reg_mentioned_p (frame_pointer_rtx
, op
)
13780 || reg_mentioned_p (arg_pointer_rtx
, op
)
13781 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13782 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13783 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13784 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13787 /* Constants are converted into offsets from labels. */
13791 ind
= XEXP (op
, 0);
13793 if (reload_completed
13794 && (LABEL_REF_P (ind
)
13795 || (GET_CODE (ind
) == CONST
13796 && GET_CODE (XEXP (ind
, 0)) == PLUS
13797 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13798 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13801 /* Match: (mem (reg)). */
13803 return arm_address_register_rtx_p (ind
, 0);
13805 /* Allow post-increment with Neon registers. */
13806 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
13807 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
13808 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13810 /* Allow post-increment by register for VLDn */
13811 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
13812 && GET_CODE (XEXP (ind
, 1)) == PLUS
13813 && REG_P (XEXP (XEXP (ind
, 1), 1))
13814 && REG_P (XEXP (ind
, 0))
13815 && rtx_equal_p (XEXP (ind
, 0), XEXP (XEXP (ind
, 1), 0)))
13822 && GET_CODE (ind
) == PLUS
13823 && REG_P (XEXP (ind
, 0))
13824 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13825 && CONST_INT_P (XEXP (ind
, 1))
13826 && INTVAL (XEXP (ind
, 1)) > -1024
13827 /* For quad modes, we restrict the constant offset to be slightly less
13828 than what the instruction format permits. We have no such constraint
13829 on double mode offsets. (This must match arm_legitimate_index_p.) */
13830 && (INTVAL (XEXP (ind
, 1))
13831 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
13832 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13838 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13841 mve_struct_mem_operand (rtx op
)
13843 rtx ind
= XEXP (op
, 0);
13845 /* Match: (mem (reg)). */
13847 return arm_address_register_rtx_p (ind
, 0);
13849 /* Allow only post-increment by the mode size. */
13850 if (GET_CODE (ind
) == POST_INC
)
13851 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13856 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13859 neon_struct_mem_operand (rtx op
)
13863 /* Reject eliminable registers. */
13864 if (! (reload_in_progress
|| reload_completed
)
13865 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13866 || reg_mentioned_p (arg_pointer_rtx
, op
)
13867 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13868 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13869 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13870 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13873 /* Constants are converted into offsets from labels. */
13877 ind
= XEXP (op
, 0);
13879 if (reload_completed
13880 && (LABEL_REF_P (ind
)
13881 || (GET_CODE (ind
) == CONST
13882 && GET_CODE (XEXP (ind
, 0)) == PLUS
13883 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13884 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13887 /* Match: (mem (reg)). */
13889 return arm_address_register_rtx_p (ind
, 0);
13891 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13892 if (GET_CODE (ind
) == POST_INC
13893 || GET_CODE (ind
) == PRE_DEC
)
13894 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13899 /* Prepares the operands for the VCMLA by lane instruction such that the right
13900 register number is selected. This instruction is special in that it always
13901 requires a D register, however there is a choice to be made between Dn[0],
13902 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13904 The VCMLA by lane function always selects two values. For instance given D0
13905 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13906 used by the instruction. However given V4SF then index 0 and 1 are valid as
13907 D0[0] or D1[0] are both valid.
13909 This function centralizes that information based on OPERANDS, OPERANDS[3]
13910 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13911 updated to contain the right index. */
13914 neon_vcmla_lane_prepare_operands (rtx
*operands
)
13916 int lane
= INTVAL (operands
[4]);
13917 machine_mode constmode
= SImode
;
13918 machine_mode mode
= GET_MODE (operands
[3]);
13919 int regno
= REGNO (operands
[3]);
13920 regno
= ((regno
- FIRST_VFP_REGNUM
) >> 1);
13921 if (lane
> 0 && lane
>= GET_MODE_NUNITS (mode
) / 4)
13923 operands
[3] = gen_int_mode (regno
+ 1, constmode
);
13925 = gen_int_mode (lane
- GET_MODE_NUNITS (mode
) / 4, constmode
);
13929 operands
[3] = gen_int_mode (regno
, constmode
);
13930 operands
[4] = gen_int_mode (lane
, constmode
);
13936 /* Return true if X is a register that will be eliminated later on. */
13938 arm_eliminable_register (rtx x
)
13940 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13941 || REGNO (x
) == ARG_POINTER_REGNUM
13942 || VIRTUAL_REGISTER_P (x
));
13945 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13946 coprocessor registers. Otherwise return NO_REGS. */
13949 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13951 if (mode
== HFmode
)
13953 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
13954 return GENERAL_REGS
;
13955 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13957 return GENERAL_REGS
;
13960 /* The neon move patterns handle all legitimate vector and struct
13963 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13964 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13965 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13966 || VALID_NEON_STRUCT_MODE (mode
)))
13969 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13972 return GENERAL_REGS
;
13975 /* Values which must be returned in the most-significant end of the return
13979 arm_return_in_msb (const_tree valtype
)
13981 return (TARGET_AAPCS_BASED
13982 && BYTES_BIG_ENDIAN
13983 && (AGGREGATE_TYPE_P (valtype
)
13984 || TREE_CODE (valtype
) == COMPLEX_TYPE
13985 || FIXED_POINT_TYPE_P (valtype
)));
13988 /* Return TRUE if X references a SYMBOL_REF. */
13990 symbol_mentioned_p (rtx x
)
13995 if (SYMBOL_REF_P (x
))
13998 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13999 are constant offsets, not symbols. */
14000 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
14003 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
14005 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
14011 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
14012 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
14015 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
14022 /* Return TRUE if X references a LABEL_REF. */
14024 label_mentioned_p (rtx x
)
14029 if (LABEL_REF_P (x
))
14032 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
14033 instruction, but they are constant offsets, not symbols. */
14034 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
14037 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
14038 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
14044 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
14045 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
14048 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
14056 tls_mentioned_p (rtx x
)
14058 switch (GET_CODE (x
))
14061 return tls_mentioned_p (XEXP (x
, 0));
14064 if (XINT (x
, 1) == UNSPEC_TLS
)
14067 /* Fall through. */
14073 /* Must not copy any rtx that uses a pc-relative address.
14074 Also, disallow copying of load-exclusive instructions that
14075 may appear after splitting of compare-and-swap-style operations
14076 so as to prevent those loops from being transformed away from their
14077 canonical forms (see PR 69904). */
14080 arm_cannot_copy_insn_p (rtx_insn
*insn
)
14082 /* The tls call insn cannot be copied, as it is paired with a data
14084 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
14087 subrtx_iterator::array_type array
;
14088 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
14090 const_rtx x
= *iter
;
14091 if (GET_CODE (x
) == UNSPEC
14092 && (XINT (x
, 1) == UNSPEC_PIC_BASE
14093 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
14097 rtx set
= single_set (insn
);
14100 rtx src
= SET_SRC (set
);
14101 if (GET_CODE (src
) == ZERO_EXTEND
)
14102 src
= XEXP (src
, 0);
14104 /* Catch the load-exclusive and load-acquire operations. */
14105 if (GET_CODE (src
) == UNSPEC_VOLATILE
14106 && (XINT (src
, 1) == VUNSPEC_LL
14107 || XINT (src
, 1) == VUNSPEC_LAX
))
14114 minmax_code (rtx x
)
14116 enum rtx_code code
= GET_CODE (x
);
14129 gcc_unreachable ();
14133 /* Match pair of min/max operators that can be implemented via usat/ssat. */
14136 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
14137 int *mask
, bool *signed_sat
)
14139 /* The high bound must be a power of two minus one. */
14140 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
14144 /* The low bound is either zero (for usat) or one less than the
14145 negation of the high bound (for ssat). */
14146 if (INTVAL (lo_bound
) == 0)
14151 *signed_sat
= false;
14156 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
14161 *signed_sat
= true;
14169 /* Return 1 if memory locations are adjacent. */
14171 adjacent_mem_locations (rtx a
, rtx b
)
14173 /* We don't guarantee to preserve the order of these memory refs. */
14174 if (volatile_refs_p (a
) || volatile_refs_p (b
))
14177 if ((REG_P (XEXP (a
, 0))
14178 || (GET_CODE (XEXP (a
, 0)) == PLUS
14179 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
14180 && (REG_P (XEXP (b
, 0))
14181 || (GET_CODE (XEXP (b
, 0)) == PLUS
14182 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
14184 HOST_WIDE_INT val0
= 0, val1
= 0;
14188 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
14190 reg0
= XEXP (XEXP (a
, 0), 0);
14191 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
14194 reg0
= XEXP (a
, 0);
14196 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
14198 reg1
= XEXP (XEXP (b
, 0), 0);
14199 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
14202 reg1
= XEXP (b
, 0);
14204 /* Don't accept any offset that will require multiple
14205 instructions to handle, since this would cause the
14206 arith_adjacentmem pattern to output an overlong sequence. */
14207 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
14210 /* Don't allow an eliminable register: register elimination can make
14211 the offset too large. */
14212 if (arm_eliminable_register (reg0
))
14215 val_diff
= val1
- val0
;
14219 /* If the target has load delay slots, then there's no benefit
14220 to using an ldm instruction unless the offset is zero and
14221 we are optimizing for size. */
14222 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
14223 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
14224 && (val_diff
== 4 || val_diff
== -4));
14227 return ((REGNO (reg0
) == REGNO (reg1
))
14228 && (val_diff
== 4 || val_diff
== -4));
14234 /* Return true if OP is a valid load or store multiple operation. LOAD is true
14235 for load operations, false for store operations. CONSECUTIVE is true
14236 if the register numbers in the operation must be consecutive in the register
14237 bank. RETURN_PC is true if value is to be loaded in PC.
14238 The pattern we are trying to match for load is:
14239 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14240 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14243 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14246 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14247 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14248 3. If consecutive is TRUE, then for kth register being loaded,
14249 REGNO (R_dk) = REGNO (R_d0) + k.
14250 The pattern for store is similar. */
14252 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
14253 bool consecutive
, bool return_pc
)
14255 HOST_WIDE_INT count
= XVECLEN (op
, 0);
14256 rtx reg
, mem
, addr
;
14258 unsigned first_regno
;
14259 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
14261 bool addr_reg_in_reglist
= false;
14262 bool update
= false;
14267 /* If not in SImode, then registers must be consecutive
14268 (e.g., VLDM instructions for DFmode). */
14269 gcc_assert ((mode
== SImode
) || consecutive
);
14270 /* Setting return_pc for stores is illegal. */
14271 gcc_assert (!return_pc
|| load
);
14273 /* Set up the increments and the regs per val based on the mode. */
14274 reg_increment
= GET_MODE_SIZE (mode
);
14275 regs_per_val
= reg_increment
/ 4;
14276 offset_adj
= return_pc
? 1 : 0;
14279 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
14280 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
14283 /* Check if this is a write-back. */
14284 elt
= XVECEXP (op
, 0, offset_adj
);
14285 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
14291 /* The offset adjustment must be the number of registers being
14292 popped times the size of a single register. */
14293 if (!REG_P (SET_DEST (elt
))
14294 || !REG_P (XEXP (SET_SRC (elt
), 0))
14295 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
14296 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
14297 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
14298 ((count
- 1 - offset_adj
) * reg_increment
))
14302 i
= i
+ offset_adj
;
14303 base
= base
+ offset_adj
;
14304 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14305 success depends on the type: VLDM can do just one reg,
14306 LDM must do at least two. */
14307 if ((count
<= i
) && (mode
== SImode
))
14310 elt
= XVECEXP (op
, 0, i
- 1);
14311 if (GET_CODE (elt
) != SET
)
14316 reg
= SET_DEST (elt
);
14317 mem
= SET_SRC (elt
);
14321 reg
= SET_SRC (elt
);
14322 mem
= SET_DEST (elt
);
14325 if (!REG_P (reg
) || !MEM_P (mem
))
14328 regno
= REGNO (reg
);
14329 first_regno
= regno
;
14330 addr
= XEXP (mem
, 0);
14331 if (GET_CODE (addr
) == PLUS
)
14333 if (!CONST_INT_P (XEXP (addr
, 1)))
14336 offset
= INTVAL (XEXP (addr
, 1));
14337 addr
= XEXP (addr
, 0);
14343 /* Don't allow SP to be loaded unless it is also the base register. It
14344 guarantees that SP is reset correctly when an LDM instruction
14345 is interrupted. Otherwise, we might end up with a corrupt stack. */
14346 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
14349 if (regno
== REGNO (addr
))
14350 addr_reg_in_reglist
= true;
14352 for (; i
< count
; i
++)
14354 elt
= XVECEXP (op
, 0, i
);
14355 if (GET_CODE (elt
) != SET
)
14360 reg
= SET_DEST (elt
);
14361 mem
= SET_SRC (elt
);
14365 reg
= SET_SRC (elt
);
14366 mem
= SET_DEST (elt
);
14370 || GET_MODE (reg
) != mode
14371 || REGNO (reg
) <= regno
14374 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
14375 /* Don't allow SP to be loaded unless it is also the base register. It
14376 guarantees that SP is reset correctly when an LDM instruction
14377 is interrupted. Otherwise, we might end up with a corrupt stack. */
14378 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
14380 || GET_MODE (mem
) != mode
14381 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
14382 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
14383 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
14384 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
14385 offset
+ (i
- base
) * reg_increment
))
14386 && (!REG_P (XEXP (mem
, 0))
14387 || offset
+ (i
- base
) * reg_increment
!= 0)))
14390 regno
= REGNO (reg
);
14391 if (regno
== REGNO (addr
))
14392 addr_reg_in_reglist
= true;
14397 if (update
&& addr_reg_in_reglist
)
14400 /* For Thumb-1, address register is always modified - either by write-back
14401 or by explicit load. If the pattern does not describe an update,
14402 then the address register must be in the list of loaded registers. */
14404 return update
|| addr_reg_in_reglist
;
14410 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14411 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14414 [(set (reg:SI <N>) (const_int 0))
14415 (set (reg:SI <M>) (const_int 0))
14417 (unspec_volatile [(const_int 0)]
14419 (clobber (reg:CC CC_REGNUM))
14422 Any number (including 0) of set expressions is valid, the volatile unspec is
14423 optional. All registers but SP and PC are allowed and registers must be in
14424 strict increasing order.
14426 To be a valid VSCCLRM pattern, OP must have the following form:
14428 [(unspec_volatile [(const_int 0)]
14429 VUNSPEC_VSCCLRM_VPR)
14430 (set (reg:SF <N>) (const_int 0))
14431 (set (reg:SF <M>) (const_int 0))
14435 As with CLRM, any number (including 0) of set expressions is valid, however
14436 the volatile unspec is mandatory here. Any VFP single-precision register is
14437 accepted but all registers must be consecutive and in increasing order. */
14440 clear_operation_p (rtx op
, bool vfp
)
14443 unsigned last_regno
= INVALID_REGNUM
;
14444 rtx elt
, reg
, zero
;
14445 int count
= XVECLEN (op
, 0);
14446 int first_set
= vfp
? 1 : 0;
14447 machine_mode expected_mode
= vfp
? E_SFmode
: E_SImode
;
14449 for (int i
= first_set
; i
< count
; i
++)
14451 elt
= XVECEXP (op
, 0, i
);
14453 if (!vfp
&& GET_CODE (elt
) == UNSPEC_VOLATILE
)
14455 if (XINT (elt
, 1) != VUNSPEC_CLRM_APSR
14456 || XVECLEN (elt
, 0) != 1
14457 || XVECEXP (elt
, 0, 0) != CONST0_RTX (SImode
)
14464 if (GET_CODE (elt
) == CLOBBER
)
14467 if (GET_CODE (elt
) != SET
)
14470 reg
= SET_DEST (elt
);
14471 zero
= SET_SRC (elt
);
14474 || GET_MODE (reg
) != expected_mode
14475 || zero
!= CONST0_RTX (SImode
))
14478 regno
= REGNO (reg
);
14482 if (i
!= first_set
&& regno
!= last_regno
+ 1)
14487 if (regno
== SP_REGNUM
|| regno
== PC_REGNUM
)
14489 if (i
!= first_set
&& regno
<= last_regno
)
14493 last_regno
= regno
;
14499 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14500 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14501 instruction. ADD_OFFSET is nonzero if the base address register needs
14502 to be modified with an add instruction before we can use it. */
14505 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
14506 int nops
, HOST_WIDE_INT add_offset
)
14508 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14509 if the offset isn't small enough. The reason 2 ldrs are faster
14510 is because these ARMs are able to do more than one cache access
14511 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14512 whilst the ARM8 has a double bandwidth cache. This means that
14513 these cores can do both an instruction fetch and a data fetch in
14514 a single cycle, so the trick of calculating the address into a
14515 scratch register (one of the result regs) and then doing a load
14516 multiple actually becomes slower (and no smaller in code size).
14517 That is the transformation
14519 ldr rd1, [rbase + offset]
14520 ldr rd2, [rbase + offset + 4]
14524 add rd1, rbase, offset
14525 ldmia rd1, {rd1, rd2}
14527 produces worse code -- '3 cycles + any stalls on rd2' instead of
14528 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14529 access per cycle, the first sequence could never complete in less
14530 than 6 cycles, whereas the ldm sequence would only take 5 and
14531 would make better use of sequential accesses if not hitting the
14534 We cheat here and test 'arm_ld_sched' which we currently know to
14535 only be true for the ARM8, ARM9 and StrongARM. If this ever
14536 changes, then the test below needs to be reworked. */
14537 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
14540 /* XScale has load-store double instructions, but they have stricter
14541 alignment requirements than load-store multiple, so we cannot
14544 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14545 the pipeline until completion.
14553 An ldr instruction takes 1-3 cycles, but does not block the
14562 Best case ldr will always win. However, the more ldr instructions
14563 we issue, the less likely we are to be able to schedule them well.
14564 Using ldr instructions also increases code size.
14566 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14567 for counts of 3 or 4 regs. */
14568 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
14573 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14574 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14575 an array ORDER which describes the sequence to use when accessing the
14576 offsets that produces an ascending order. In this sequence, each
14577 offset must be larger by exactly 4 than the previous one. ORDER[0]
14578 must have been filled in with the lowest offset by the caller.
14579 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14580 we use to verify that ORDER produces an ascending order of registers.
14581 Return true if it was possible to construct such an order, false if
14585 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
14586 int *unsorted_regs
)
14589 for (i
= 1; i
< nops
; i
++)
14593 order
[i
] = order
[i
- 1];
14594 for (j
= 0; j
< nops
; j
++)
14595 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
14597 /* We must find exactly one offset that is higher than the
14598 previous one by 4. */
14599 if (order
[i
] != order
[i
- 1])
14603 if (order
[i
] == order
[i
- 1])
14605 /* The register numbers must be ascending. */
14606 if (unsorted_regs
!= NULL
14607 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
14613 /* Used to determine in a peephole whether a sequence of load
14614 instructions can be changed into a load-multiple instruction.
14615 NOPS is the number of separate load instructions we are examining. The
14616 first NOPS entries in OPERANDS are the destination registers, the
14617 next NOPS entries are memory operands. If this function is
14618 successful, *BASE is set to the common base register of the memory
14619 accesses; *LOAD_OFFSET is set to the first memory location's offset
14620 from that base register.
14621 REGS is an array filled in with the destination register numbers.
14622 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14623 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14624 the sequence of registers in REGS matches the loads from ascending memory
14625 locations, and the function verifies that the register numbers are
14626 themselves ascending. If CHECK_REGS is false, the register numbers
14627 are stored in the order they are found in the operands. */
14629 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
14630 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
14632 int unsorted_regs
[MAX_LDM_STM_OPS
];
14633 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
14634 int order
[MAX_LDM_STM_OPS
];
14638 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14639 easily extended if required. */
14640 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
14642 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
14644 /* Loop over the operands and check that the memory references are
14645 suitable (i.e. immediate offsets from the same base register). At
14646 the same time, extract the target register, and the memory
14648 for (i
= 0; i
< nops
; i
++)
14653 /* Convert a subreg of a mem into the mem itself. */
14654 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
14655 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
14657 gcc_assert (MEM_P (operands
[nops
+ i
]));
14659 /* Don't reorder volatile memory references; it doesn't seem worth
14660 looking for the case where the order is ok anyway. */
14661 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
14664 offset
= const0_rtx
;
14666 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
14668 && REG_P (reg
= SUBREG_REG (reg
))))
14669 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
14670 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
14672 && REG_P (reg
= SUBREG_REG (reg
))))
14673 && (CONST_INT_P (offset
14674 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
14678 base_reg
= REGNO (reg
);
14679 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
14682 else if (base_reg
!= (int) REGNO (reg
))
14683 /* Not addressed from the same base register. */
14686 unsorted_regs
[i
] = (REG_P (operands
[i
])
14687 ? REGNO (operands
[i
])
14688 : REGNO (SUBREG_REG (operands
[i
])));
14690 /* If it isn't an integer register, or if it overwrites the
14691 base register but isn't the last insn in the list, then
14692 we can't do this. */
14693 if (unsorted_regs
[i
] < 0
14694 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
14695 || unsorted_regs
[i
] > 14
14696 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
14699 /* Don't allow SP to be loaded unless it is also the base
14700 register. It guarantees that SP is reset correctly when
14701 an LDM instruction is interrupted. Otherwise, we might
14702 end up with a corrupt stack. */
14703 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
14706 unsorted_offsets
[i
] = INTVAL (offset
);
14707 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
14711 /* Not a suitable memory address. */
14715 /* All the useful information has now been extracted from the
14716 operands into unsorted_regs and unsorted_offsets; additionally,
14717 order[0] has been set to the lowest offset in the list. Sort
14718 the offsets into order, verifying that they are adjacent, and
14719 check that the register numbers are ascending. */
14720 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
14721 check_regs
? unsorted_regs
: NULL
))
14725 memcpy (saved_order
, order
, sizeof order
);
14731 for (i
= 0; i
< nops
; i
++)
14732 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
14734 *load_offset
= unsorted_offsets
[order
[0]];
14737 if (unsorted_offsets
[order
[0]] == 0)
14738 ldm_case
= 1; /* ldmia */
14739 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14740 ldm_case
= 2; /* ldmib */
14741 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14742 ldm_case
= 3; /* ldmda */
14743 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14744 ldm_case
= 4; /* ldmdb */
14745 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
14746 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
14751 if (!multiple_operation_profitable_p (false, nops
,
14753 ? unsorted_offsets
[order
[0]] : 0))
14759 /* Used to determine in a peephole whether a sequence of store instructions can
14760 be changed into a store-multiple instruction.
14761 NOPS is the number of separate store instructions we are examining.
14762 NOPS_TOTAL is the total number of instructions recognized by the peephole
14764 The first NOPS entries in OPERANDS are the source registers, the next
14765 NOPS entries are memory operands. If this function is successful, *BASE is
14766 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14767 to the first memory location's offset from that base register. REGS is an
14768 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14769 likewise filled with the corresponding rtx's.
14770 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14771 numbers to an ascending order of stores.
14772 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14773 from ascending memory locations, and the function verifies that the register
14774 numbers are themselves ascending. If CHECK_REGS is false, the register
14775 numbers are stored in the order they are found in the operands. */
14777 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
14778 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
14779 HOST_WIDE_INT
*load_offset
, bool check_regs
)
14781 int unsorted_regs
[MAX_LDM_STM_OPS
];
14782 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
14783 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
14784 int order
[MAX_LDM_STM_OPS
];
14786 rtx base_reg_rtx
= NULL
;
14789 /* Write back of base register is currently only supported for Thumb 1. */
14790 int base_writeback
= TARGET_THUMB1
;
14792 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14793 easily extended if required. */
14794 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
14796 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
14798 /* Loop over the operands and check that the memory references are
14799 suitable (i.e. immediate offsets from the same base register). At
14800 the same time, extract the target register, and the memory
14802 for (i
= 0; i
< nops
; i
++)
14807 /* Convert a subreg of a mem into the mem itself. */
14808 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
14809 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
14811 gcc_assert (MEM_P (operands
[nops
+ i
]));
14813 /* Don't reorder volatile memory references; it doesn't seem worth
14814 looking for the case where the order is ok anyway. */
14815 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
14818 offset
= const0_rtx
;
14820 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
14822 && REG_P (reg
= SUBREG_REG (reg
))))
14823 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
14824 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
14826 && REG_P (reg
= SUBREG_REG (reg
))))
14827 && (CONST_INT_P (offset
14828 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
14830 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
14831 ? operands
[i
] : SUBREG_REG (operands
[i
]));
14832 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
14836 base_reg
= REGNO (reg
);
14837 base_reg_rtx
= reg
;
14838 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
14841 else if (base_reg
!= (int) REGNO (reg
))
14842 /* Not addressed from the same base register. */
14845 /* If it isn't an integer register, then we can't do this. */
14846 if (unsorted_regs
[i
] < 0
14847 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
14848 /* The effects are unpredictable if the base register is
14849 both updated and stored. */
14850 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
14851 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
14852 || unsorted_regs
[i
] > 14)
14855 unsorted_offsets
[i
] = INTVAL (offset
);
14856 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
14860 /* Not a suitable memory address. */
14864 /* All the useful information has now been extracted from the
14865 operands into unsorted_regs and unsorted_offsets; additionally,
14866 order[0] has been set to the lowest offset in the list. Sort
14867 the offsets into order, verifying that they are adjacent, and
14868 check that the register numbers are ascending. */
14869 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
14870 check_regs
? unsorted_regs
: NULL
))
14874 memcpy (saved_order
, order
, sizeof order
);
14880 for (i
= 0; i
< nops
; i
++)
14882 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
14884 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
14887 *load_offset
= unsorted_offsets
[order
[0]];
14891 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
14894 if (unsorted_offsets
[order
[0]] == 0)
14895 stm_case
= 1; /* stmia */
14896 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14897 stm_case
= 2; /* stmib */
14898 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14899 stm_case
= 3; /* stmda */
14900 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14901 stm_case
= 4; /* stmdb */
14905 if (!multiple_operation_profitable_p (false, nops
, 0))
14911 /* Routines for use in generating RTL. */
14913 /* Generate a load-multiple instruction. COUNT is the number of loads in
14914 the instruction; REGS and MEMS are arrays containing the operands.
14915 BASEREG is the base register to be used in addressing the memory operands.
14916 WBACK_OFFSET is nonzero if the instruction should update the base
14920 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14921 HOST_WIDE_INT wback_offset
)
14926 if (!multiple_operation_profitable_p (false, count
, 0))
14932 for (i
= 0; i
< count
; i
++)
14933 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
14935 if (wback_offset
!= 0)
14936 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14938 seq
= get_insns ();
14944 result
= gen_rtx_PARALLEL (VOIDmode
,
14945 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14946 if (wback_offset
!= 0)
14948 XVECEXP (result
, 0, 0)
14949 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14954 for (j
= 0; i
< count
; i
++, j
++)
14955 XVECEXP (result
, 0, i
)
14956 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
14961 /* Generate a store-multiple instruction. COUNT is the number of stores in
14962 the instruction; REGS and MEMS are arrays containing the operands.
14963 BASEREG is the base register to be used in addressing the memory operands.
14964 WBACK_OFFSET is nonzero if the instruction should update the base
14968 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14969 HOST_WIDE_INT wback_offset
)
14974 if (GET_CODE (basereg
) == PLUS
)
14975 basereg
= XEXP (basereg
, 0);
14977 if (!multiple_operation_profitable_p (false, count
, 0))
14983 for (i
= 0; i
< count
; i
++)
14984 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
14986 if (wback_offset
!= 0)
14987 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14989 seq
= get_insns ();
14995 result
= gen_rtx_PARALLEL (VOIDmode
,
14996 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14997 if (wback_offset
!= 0)
14999 XVECEXP (result
, 0, 0)
15000 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
15005 for (j
= 0; i
< count
; i
++, j
++)
15006 XVECEXP (result
, 0, i
)
15007 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
15012 /* Generate either a load-multiple or a store-multiple instruction. This
15013 function can be used in situations where we can start with a single MEM
15014 rtx and adjust its address upwards.
15015 COUNT is the number of operations in the instruction, not counting a
15016 possible update of the base register. REGS is an array containing the
15018 BASEREG is the base register to be used in addressing the memory operands,
15019 which are constructed from BASEMEM.
15020 WRITE_BACK specifies whether the generated instruction should include an
15021 update of the base register.
15022 OFFSETP is used to pass an offset to and from this function; this offset
15023 is not used when constructing the address (instead BASEMEM should have an
15024 appropriate offset in its address), it is used only for setting
15025 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
15028 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
15029 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
15031 rtx mems
[MAX_LDM_STM_OPS
];
15032 HOST_WIDE_INT offset
= *offsetp
;
15035 gcc_assert (count
<= MAX_LDM_STM_OPS
);
15037 if (GET_CODE (basereg
) == PLUS
)
15038 basereg
= XEXP (basereg
, 0);
15040 for (i
= 0; i
< count
; i
++)
15042 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
15043 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
15051 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
15052 write_back
? 4 * count
: 0);
15054 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
15055 write_back
? 4 * count
: 0);
15059 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
15060 rtx basemem
, HOST_WIDE_INT
*offsetp
)
15062 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
15067 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
15068 rtx basemem
, HOST_WIDE_INT
*offsetp
)
15070 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
15074 /* Called from a peephole2 expander to turn a sequence of loads into an
15075 LDM instruction. OPERANDS are the operands found by the peephole matcher;
15076 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
15077 is true if we can reorder the registers because they are used commutatively
15079 Returns true iff we could generate a new instruction. */
15082 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
15084 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
15085 rtx mems
[MAX_LDM_STM_OPS
];
15086 int i
, j
, base_reg
;
15088 HOST_WIDE_INT offset
;
15089 int write_back
= FALSE
;
15093 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
15094 &base_reg
, &offset
, !sort_regs
);
15100 for (i
= 0; i
< nops
- 1; i
++)
15101 for (j
= i
+ 1; j
< nops
; j
++)
15102 if (regs
[i
] > regs
[j
])
15108 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15112 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
15114 /* Thumb-1 ldm uses writeback except if the base is loaded. */
15116 for (i
= 0; i
< nops
; i
++)
15117 if (base_reg
== regs
[i
])
15118 write_back
= false;
15120 /* Ensure the base is dead if it is updated. */
15121 if (write_back
&& !peep2_reg_dead_p (nops
, base_reg_rtx
))
15127 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
15128 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
15130 base_reg_rtx
= newbase
;
15133 for (i
= 0; i
< nops
; i
++)
15135 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15136 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15139 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
15140 write_back
? offset
+ i
* 4 : 0));
15144 /* Called from a peephole2 expander to turn a sequence of stores into an
15145 STM instruction. OPERANDS are the operands found by the peephole matcher;
15146 NOPS indicates how many separate stores we are trying to combine.
15147 Returns true iff we could generate a new instruction. */
15150 gen_stm_seq (rtx
*operands
, int nops
)
15153 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
15154 rtx mems
[MAX_LDM_STM_OPS
];
15157 HOST_WIDE_INT offset
;
15158 int write_back
= FALSE
;
15161 bool base_reg_dies
;
15163 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
15164 mem_order
, &base_reg
, &offset
, true);
15169 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15171 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
15174 gcc_assert (base_reg_dies
);
15180 gcc_assert (base_reg_dies
);
15181 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
15185 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
15187 for (i
= 0; i
< nops
; i
++)
15189 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15190 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15193 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
15194 write_back
? offset
+ i
* 4 : 0));
15198 /* Called from a peephole2 expander to turn a sequence of stores that are
15199 preceded by constant loads into an STM instruction. OPERANDS are the
15200 operands found by the peephole matcher; NOPS indicates how many
15201 separate stores we are trying to combine; there are 2 * NOPS
15202 instructions in the peephole.
15203 Returns true iff we could generate a new instruction. */
15206 gen_const_stm_seq (rtx
*operands
, int nops
)
15208 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
15209 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
15210 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
15211 rtx mems
[MAX_LDM_STM_OPS
];
15214 HOST_WIDE_INT offset
;
15215 int write_back
= FALSE
;
15218 bool base_reg_dies
;
15220 HARD_REG_SET allocated
;
15222 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
15223 mem_order
, &base_reg
, &offset
, false);
15228 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
15230 /* If the same register is used more than once, try to find a free
15232 CLEAR_HARD_REG_SET (allocated
);
15233 for (i
= 0; i
< nops
; i
++)
15235 for (j
= i
+ 1; j
< nops
; j
++)
15236 if (regs
[i
] == regs
[j
])
15238 rtx t
= peep2_find_free_register (0, nops
* 2,
15239 TARGET_THUMB1
? "l" : "r",
15240 SImode
, &allocated
);
15244 regs
[i
] = REGNO (t
);
15248 /* Compute an ordering that maps the register numbers to an ascending
15251 for (i
= 0; i
< nops
; i
++)
15252 if (regs
[i
] < regs
[reg_order
[0]])
15255 for (i
= 1; i
< nops
; i
++)
15257 int this_order
= reg_order
[i
- 1];
15258 for (j
= 0; j
< nops
; j
++)
15259 if (regs
[j
] > regs
[reg_order
[i
- 1]]
15260 && (this_order
== reg_order
[i
- 1]
15261 || regs
[j
] < regs
[this_order
]))
15263 reg_order
[i
] = this_order
;
15266 /* Ensure that registers that must be live after the instruction end
15267 up with the correct value. */
15268 for (i
= 0; i
< nops
; i
++)
15270 int this_order
= reg_order
[i
];
15271 if ((this_order
!= mem_order
[i
]
15272 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
15273 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
15277 /* Load the constants. */
15278 for (i
= 0; i
< nops
; i
++)
15280 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
15281 sorted_regs
[i
] = regs
[reg_order
[i
]];
15282 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
15285 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15287 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
15290 gcc_assert (base_reg_dies
);
15296 gcc_assert (base_reg_dies
);
15297 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
15301 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
15303 for (i
= 0; i
< nops
; i
++)
15305 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15306 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15309 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
15310 write_back
? offset
+ i
* 4 : 0));
15314 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15315 unaligned copies on processors which support unaligned semantics for those
15316 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
15317 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15318 An interleave factor of 1 (the minimum) will perform no interleaving.
15319 Load/store multiple are used for aligned addresses where possible. */
15322 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
15323 HOST_WIDE_INT length
,
15324 unsigned int interleave_factor
)
15326 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
15327 int *regnos
= XALLOCAVEC (int, interleave_factor
);
15328 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
15329 HOST_WIDE_INT i
, j
;
15330 HOST_WIDE_INT remaining
= length
, words
;
15331 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
15333 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
15334 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
15335 HOST_WIDE_INT srcoffset
, dstoffset
;
15336 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
15339 gcc_assert (interleave_factor
>= 1 && interleave_factor
<= 4);
15341 /* Use hard registers if we have aligned source or destination so we can use
15342 load/store multiple with contiguous registers. */
15343 if (dst_aligned
|| src_aligned
)
15344 for (i
= 0; i
< interleave_factor
; i
++)
15345 regs
[i
] = gen_rtx_REG (SImode
, i
);
15347 for (i
= 0; i
< interleave_factor
; i
++)
15348 regs
[i
] = gen_reg_rtx (SImode
);
15350 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
15351 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
15353 srcoffset
= dstoffset
= 0;
15355 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15356 For copying the last bytes we want to subtract this offset again. */
15357 src_autoinc
= dst_autoinc
= 0;
15359 for (i
= 0; i
< interleave_factor
; i
++)
15362 /* Copy BLOCK_SIZE_BYTES chunks. */
15364 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
15367 if (src_aligned
&& interleave_factor
> 1)
15369 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
15370 TRUE
, srcbase
, &srcoffset
));
15371 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
15375 for (j
= 0; j
< interleave_factor
; j
++)
15377 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
15379 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
15380 srcoffset
+ j
* UNITS_PER_WORD
);
15381 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
15383 srcoffset
+= block_size_bytes
;
15387 if (dst_aligned
&& interleave_factor
> 1)
15389 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
15390 TRUE
, dstbase
, &dstoffset
));
15391 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
15395 for (j
= 0; j
< interleave_factor
; j
++)
15397 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
15399 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
15400 dstoffset
+ j
* UNITS_PER_WORD
);
15401 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
15403 dstoffset
+= block_size_bytes
;
15406 remaining
-= block_size_bytes
;
15409 /* Copy any whole words left (note these aren't interleaved with any
15410 subsequent halfword/byte load/stores in the interests of simplicity). */
15412 words
= remaining
/ UNITS_PER_WORD
;
15414 gcc_assert (words
< interleave_factor
);
15416 if (src_aligned
&& words
> 1)
15418 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
15420 src_autoinc
+= UNITS_PER_WORD
* words
;
15424 for (j
= 0; j
< words
; j
++)
15426 addr
= plus_constant (Pmode
, src
,
15427 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
15428 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
15429 srcoffset
+ j
* UNITS_PER_WORD
);
15431 emit_move_insn (regs
[j
], mem
);
15433 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
15435 srcoffset
+= words
* UNITS_PER_WORD
;
15438 if (dst_aligned
&& words
> 1)
15440 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
15442 dst_autoinc
+= words
* UNITS_PER_WORD
;
15446 for (j
= 0; j
< words
; j
++)
15448 addr
= plus_constant (Pmode
, dst
,
15449 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
15450 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
15451 dstoffset
+ j
* UNITS_PER_WORD
);
15453 emit_move_insn (mem
, regs
[j
]);
15455 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
15457 dstoffset
+= words
* UNITS_PER_WORD
;
15460 remaining
-= words
* UNITS_PER_WORD
;
15462 gcc_assert (remaining
< 4);
15464 /* Copy a halfword if necessary. */
15466 if (remaining
>= 2)
15468 halfword_tmp
= gen_reg_rtx (SImode
);
15470 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
15471 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
15472 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
15474 /* Either write out immediately, or delay until we've loaded the last
15475 byte, depending on interleave factor. */
15476 if (interleave_factor
== 1)
15478 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15479 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
15480 emit_insn (gen_unaligned_storehi (mem
,
15481 gen_lowpart (HImode
, halfword_tmp
)));
15482 halfword_tmp
= NULL
;
15490 gcc_assert (remaining
< 2);
15492 /* Copy last byte. */
15494 if ((remaining
& 1) != 0)
15496 byte_tmp
= gen_reg_rtx (SImode
);
15498 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
15499 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
15500 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
15502 if (interleave_factor
== 1)
15504 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15505 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
15506 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
15515 /* Store last halfword if we haven't done so already. */
15519 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15520 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
15521 emit_insn (gen_unaligned_storehi (mem
,
15522 gen_lowpart (HImode
, halfword_tmp
)));
15526 /* Likewise for last byte. */
15530 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15531 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
15532 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
15536 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
15539 /* From mips_adjust_block_mem:
15541 Helper function for doing a loop-based block operation on memory
15542 reference MEM. Each iteration of the loop will operate on LENGTH
15545 Create a new base register for use within the loop and point it to
15546 the start of MEM. Create a new memory reference that uses this
15547 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15550 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
15553 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
15555 /* Although the new mem does not refer to a known location,
15556 it does keep up to LENGTH bytes of alignment. */
15557 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
15558 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
15561 /* From mips_block_move_loop:
15563 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15564 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15565 the memory regions do not overlap. */
15568 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
15569 unsigned int interleave_factor
,
15570 HOST_WIDE_INT bytes_per_iter
)
15572 rtx src_reg
, dest_reg
, final_src
, test
;
15573 HOST_WIDE_INT leftover
;
15575 leftover
= length
% bytes_per_iter
;
15576 length
-= leftover
;
15578 /* Create registers and memory references for use within the loop. */
15579 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
15580 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
15582 /* Calculate the value that SRC_REG should have after the last iteration of
15584 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
15585 0, 0, OPTAB_WIDEN
);
15587 /* Emit the start of the loop. */
15588 rtx_code_label
*label
= gen_label_rtx ();
15589 emit_label (label
);
15591 /* Emit the loop body. */
15592 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
15593 interleave_factor
);
15595 /* Move on to the next block. */
15596 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
15597 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
15599 /* Emit the loop condition. */
15600 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
15601 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
15603 /* Mop up any left-over bytes. */
15605 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
15608 /* Emit a block move when either the source or destination is unaligned (not
15609 aligned to a four-byte boundary). This may need further tuning depending on
15610 core type, optimize_size setting, etc. */
15613 arm_cpymemqi_unaligned (rtx
*operands
)
15615 HOST_WIDE_INT length
= INTVAL (operands
[2]);
15619 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
15620 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
15621 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15622 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15623 or dst_aligned though: allow more interleaving in those cases since the
15624 resulting code can be smaller. */
15625 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
15626 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
15629 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
15630 interleave_factor
, bytes_per_iter
);
15632 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
15633 interleave_factor
);
15637 /* Note that the loop created by arm_block_move_unaligned_loop may be
15638 subject to loop unrolling, which makes tuning this condition a little
15641 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
15643 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
15650 arm_gen_cpymemqi (rtx
*operands
)
15652 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
15653 HOST_WIDE_INT srcoffset
, dstoffset
;
15654 rtx src
, dst
, srcbase
, dstbase
;
15655 rtx part_bytes_reg
= NULL
;
15658 if (!CONST_INT_P (operands
[2])
15659 || !CONST_INT_P (operands
[3])
15660 || INTVAL (operands
[2]) > 64)
15663 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
15664 return arm_cpymemqi_unaligned (operands
);
15666 if (INTVAL (operands
[3]) & 3)
15669 dstbase
= operands
[0];
15670 srcbase
= operands
[1];
15672 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
15673 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
15675 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
15676 out_words_to_go
= INTVAL (operands
[2]) / 4;
15677 last_bytes
= INTVAL (operands
[2]) & 3;
15678 dstoffset
= srcoffset
= 0;
15680 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
15681 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
15683 while (in_words_to_go
>= 2)
15685 if (in_words_to_go
> 4)
15686 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
15687 TRUE
, srcbase
, &srcoffset
));
15689 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
15690 src
, FALSE
, srcbase
,
15693 if (out_words_to_go
)
15695 if (out_words_to_go
> 4)
15696 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
15697 TRUE
, dstbase
, &dstoffset
));
15698 else if (out_words_to_go
!= 1)
15699 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
15700 out_words_to_go
, dst
,
15703 dstbase
, &dstoffset
));
15706 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
15707 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
15708 if (last_bytes
!= 0)
15710 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
15716 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
15717 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
15720 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15721 if (out_words_to_go
)
15725 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
15726 sreg
= copy_to_reg (mem
);
15728 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
15729 emit_move_insn (mem
, sreg
);
15732 gcc_assert (!in_words_to_go
); /* Sanity check */
15735 if (in_words_to_go
)
15737 gcc_assert (in_words_to_go
> 0);
15739 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
15740 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
15743 gcc_assert (!last_bytes
|| part_bytes_reg
);
15745 if (BYTES_BIG_ENDIAN
&& last_bytes
)
15747 rtx tmp
= gen_reg_rtx (SImode
);
15749 /* The bytes we want are in the top end of the word. */
15750 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
15751 GEN_INT (8 * (4 - last_bytes
))));
15752 part_bytes_reg
= tmp
;
15756 mem
= adjust_automodify_address (dstbase
, QImode
,
15757 plus_constant (Pmode
, dst
,
15759 dstoffset
+ last_bytes
- 1);
15760 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
15764 tmp
= gen_reg_rtx (SImode
);
15765 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
15766 part_bytes_reg
= tmp
;
15773 if (last_bytes
> 1)
15775 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
15776 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
15780 rtx tmp
= gen_reg_rtx (SImode
);
15781 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
15782 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
15783 part_bytes_reg
= tmp
;
15790 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
15791 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
15798 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15801 next_consecutive_mem (rtx mem
)
15803 machine_mode mode
= GET_MODE (mem
);
15804 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
15805 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
15807 return adjust_automodify_address (mem
, mode
, addr
, offset
);
15810 /* Copy using LDRD/STRD instructions whenever possible.
15811 Returns true upon success. */
15813 gen_cpymem_ldrd_strd (rtx
*operands
)
15815 unsigned HOST_WIDE_INT len
;
15816 HOST_WIDE_INT align
;
15817 rtx src
, dst
, base
;
15819 bool src_aligned
, dst_aligned
;
15820 bool src_volatile
, dst_volatile
;
15822 gcc_assert (CONST_INT_P (operands
[2]));
15823 gcc_assert (CONST_INT_P (operands
[3]));
15825 len
= UINTVAL (operands
[2]);
15829 /* Maximum alignment we can assume for both src and dst buffers. */
15830 align
= INTVAL (operands
[3]);
15832 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
15835 /* Place src and dst addresses in registers
15836 and update the corresponding mem rtx. */
15838 dst_volatile
= MEM_VOLATILE_P (dst
);
15839 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
15840 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
15841 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
15844 src_volatile
= MEM_VOLATILE_P (src
);
15845 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
15846 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
15847 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
15849 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
15852 if (src_volatile
|| dst_volatile
)
15855 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15856 if (!(dst_aligned
|| src_aligned
))
15857 return arm_gen_cpymemqi (operands
);
15859 /* If the either src or dst is unaligned we'll be accessing it as pairs
15860 of unaligned SImode accesses. Otherwise we can generate DImode
15861 ldrd/strd instructions. */
15862 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
15863 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
15868 reg0
= gen_reg_rtx (DImode
);
15869 rtx first_reg
= NULL_RTX
;
15870 rtx second_reg
= NULL_RTX
;
15872 if (!src_aligned
|| !dst_aligned
)
15874 if (BYTES_BIG_ENDIAN
)
15876 second_reg
= gen_lowpart (SImode
, reg0
);
15877 first_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
15881 first_reg
= gen_lowpart (SImode
, reg0
);
15882 second_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
15885 if (MEM_ALIGN (src
) >= 2 * BITS_PER_WORD
)
15886 emit_move_insn (reg0
, src
);
15887 else if (src_aligned
)
15888 emit_insn (gen_unaligned_loaddi (reg0
, src
));
15891 emit_insn (gen_unaligned_loadsi (first_reg
, src
));
15892 src
= next_consecutive_mem (src
);
15893 emit_insn (gen_unaligned_loadsi (second_reg
, src
));
15896 if (MEM_ALIGN (dst
) >= 2 * BITS_PER_WORD
)
15897 emit_move_insn (dst
, reg0
);
15898 else if (dst_aligned
)
15899 emit_insn (gen_unaligned_storedi (dst
, reg0
));
15902 emit_insn (gen_unaligned_storesi (dst
, first_reg
));
15903 dst
= next_consecutive_mem (dst
);
15904 emit_insn (gen_unaligned_storesi (dst
, second_reg
));
15907 src
= next_consecutive_mem (src
);
15908 dst
= next_consecutive_mem (dst
);
15911 gcc_assert (len
< 8);
15914 /* More than a word but less than a double-word to copy. Copy a word. */
15915 reg0
= gen_reg_rtx (SImode
);
15916 src
= adjust_address (src
, SImode
, 0);
15917 dst
= adjust_address (dst
, SImode
, 0);
15919 emit_move_insn (reg0
, src
);
15921 emit_insn (gen_unaligned_loadsi (reg0
, src
));
15924 emit_move_insn (dst
, reg0
);
15926 emit_insn (gen_unaligned_storesi (dst
, reg0
));
15928 src
= next_consecutive_mem (src
);
15929 dst
= next_consecutive_mem (dst
);
15936 /* Copy the remaining bytes. */
15939 dst
= adjust_address (dst
, HImode
, 0);
15940 src
= adjust_address (src
, HImode
, 0);
15941 reg0
= gen_reg_rtx (SImode
);
15943 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
15945 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
15948 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
15950 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
15952 src
= next_consecutive_mem (src
);
15953 dst
= next_consecutive_mem (dst
);
15958 dst
= adjust_address (dst
, QImode
, 0);
15959 src
= adjust_address (src
, QImode
, 0);
15960 reg0
= gen_reg_rtx (QImode
);
15961 emit_move_insn (reg0
, src
);
15962 emit_move_insn (dst
, reg0
);
15966 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15967 into its component 32-bit subregs. OP2 may be an immediate
15968 constant and we want to simplify it in that case. */
15970 arm_decompose_di_binop (rtx op1
, rtx op2
, rtx
*lo_op1
, rtx
*hi_op1
,
15971 rtx
*lo_op2
, rtx
*hi_op2
)
15973 *lo_op1
= gen_lowpart (SImode
, op1
);
15974 *hi_op1
= gen_highpart (SImode
, op1
);
15975 *lo_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
15976 subreg_lowpart_offset (SImode
, DImode
));
15977 *hi_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
15978 subreg_highpart_offset (SImode
, DImode
));
15981 /* Select a dominance comparison mode if possible for a test of the general
15982 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15983 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15984 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15985 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15986 In all cases OP will be either EQ or NE, but we don't need to know which
15987 here. If we are unable to support a dominance comparison we return
15988 CC mode. This will then fail to match for the RTL expressions that
15989 generate this call. */
15991 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
15993 enum rtx_code cond1
, cond2
;
15996 /* Currently we will probably get the wrong result if the individual
15997 comparisons are not simple. This also ensures that it is safe to
15998 reverse a comparison if necessary. */
15999 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
16001 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
16005 /* The if_then_else variant of this tests the second condition if the
16006 first passes, but is true if the first fails. Reverse the first
16007 condition to get a true "inclusive-or" expression. */
16008 if (cond_or
== DOM_CC_NX_OR_Y
)
16009 cond1
= reverse_condition (cond1
);
16011 /* If the comparisons are not equal, and one doesn't dominate the other,
16012 then we can't do this. */
16014 && !comparison_dominates_p (cond1
, cond2
)
16015 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
16019 std::swap (cond1
, cond2
);
16024 if (cond_or
== DOM_CC_X_AND_Y
)
16029 case EQ
: return CC_DEQmode
;
16030 case LE
: return CC_DLEmode
;
16031 case LEU
: return CC_DLEUmode
;
16032 case GE
: return CC_DGEmode
;
16033 case GEU
: return CC_DGEUmode
;
16034 default: gcc_unreachable ();
16038 if (cond_or
== DOM_CC_X_AND_Y
)
16050 gcc_unreachable ();
16054 if (cond_or
== DOM_CC_X_AND_Y
)
16066 gcc_unreachable ();
16070 if (cond_or
== DOM_CC_X_AND_Y
)
16071 return CC_DLTUmode
;
16076 return CC_DLTUmode
;
16078 return CC_DLEUmode
;
16082 gcc_unreachable ();
16086 if (cond_or
== DOM_CC_X_AND_Y
)
16087 return CC_DGTUmode
;
16092 return CC_DGTUmode
;
16094 return CC_DGEUmode
;
16098 gcc_unreachable ();
16101 /* The remaining cases only occur when both comparisons are the
16104 gcc_assert (cond1
== cond2
);
16108 gcc_assert (cond1
== cond2
);
16112 gcc_assert (cond1
== cond2
);
16116 gcc_assert (cond1
== cond2
);
16117 return CC_DLEUmode
;
16120 gcc_assert (cond1
== cond2
);
16121 return CC_DGEUmode
;
16124 gcc_unreachable ();
16129 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
16131 /* All floating point compares return CCFP if it is an equality
16132 comparison, and CCFPE otherwise. */
16133 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
16156 gcc_unreachable ();
16160 /* A compare with a shifted operand. Because of canonicalization, the
16161 comparison will have to be swapped when we emit the assembler. */
16162 if (GET_MODE (y
) == SImode
16163 && (REG_P (y
) || (SUBREG_P (y
)))
16164 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
16165 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
16166 || GET_CODE (x
) == ROTATERT
))
16169 /* A widened compare of the sum of a value plus a carry against a
16170 constant. This is a representation of RSC. We want to swap the
16171 result of the comparison at output. Not valid if the Z bit is
16173 if (GET_MODE (x
) == DImode
16174 && GET_CODE (x
) == PLUS
16175 && arm_borrow_operation (XEXP (x
, 1), DImode
)
16177 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
16178 && (op
== LE
|| op
== GT
))
16179 || (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
16180 && (op
== LEU
|| op
== GTU
))))
16183 /* If X is a constant we want to use CC_RSBmode. This is
16184 non-canonical, but arm_gen_compare_reg uses this to generate the
16185 correct canonical form. */
16186 if (GET_MODE (y
) == SImode
16187 && (REG_P (y
) || SUBREG_P (y
))
16188 && CONST_INT_P (x
))
16191 /* This operation is performed swapped, but since we only rely on the Z
16192 flag we don't need an additional mode. */
16193 if (GET_MODE (y
) == SImode
16194 && (REG_P (y
) || (SUBREG_P (y
)))
16195 && GET_CODE (x
) == NEG
16196 && (op
== EQ
|| op
== NE
))
16199 /* This is a special case that is used by combine to allow a
16200 comparison of a shifted byte load to be split into a zero-extend
16201 followed by a comparison of the shifted integer (only valid for
16202 equalities and unsigned inequalities). */
16203 if (GET_MODE (x
) == SImode
16204 && GET_CODE (x
) == ASHIFT
16205 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
16206 && GET_CODE (XEXP (x
, 0)) == SUBREG
16207 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
16208 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
16209 && (op
== EQ
|| op
== NE
16210 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
16211 && CONST_INT_P (y
))
16214 /* A construct for a conditional compare, if the false arm contains
16215 0, then both conditions must be true, otherwise either condition
16216 must be true. Not all conditions are possible, so CCmode is
16217 returned if it can't be done. */
16218 if (GET_CODE (x
) == IF_THEN_ELSE
16219 && (XEXP (x
, 2) == const0_rtx
16220 || XEXP (x
, 2) == const1_rtx
)
16221 && COMPARISON_P (XEXP (x
, 0))
16222 && COMPARISON_P (XEXP (x
, 1)))
16223 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16224 INTVAL (XEXP (x
, 2)));
16226 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
16227 if (GET_CODE (x
) == AND
16228 && (op
== EQ
|| op
== NE
)
16229 && COMPARISON_P (XEXP (x
, 0))
16230 && COMPARISON_P (XEXP (x
, 1)))
16231 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16234 if (GET_CODE (x
) == IOR
16235 && (op
== EQ
|| op
== NE
)
16236 && COMPARISON_P (XEXP (x
, 0))
16237 && COMPARISON_P (XEXP (x
, 1)))
16238 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16241 /* An operation (on Thumb) where we want to test for a single bit.
16242 This is done by shifting that bit up into the top bit of a
16243 scratch register; we can then branch on the sign bit. */
16245 && GET_MODE (x
) == SImode
16246 && (op
== EQ
|| op
== NE
)
16247 && GET_CODE (x
) == ZERO_EXTRACT
16248 && XEXP (x
, 1) == const1_rtx
)
16251 /* An operation that sets the condition codes as a side-effect, the
16252 V flag is not set correctly, so we can only use comparisons where
16253 this doesn't matter. (For LT and GE we can use "mi" and "pl"
16255 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
16256 if (GET_MODE (x
) == SImode
16258 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
16259 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
16260 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
16261 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
16262 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
16263 || GET_CODE (x
) == LSHIFTRT
16264 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
16265 || GET_CODE (x
) == ROTATERT
16266 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
16269 /* A comparison of ~reg with a const is really a special
16270 canoncialization of compare (~const, reg), which is a reverse
16271 subtract operation. We may not get here if CONST is 0, but that
16272 doesn't matter because ~0 isn't a valid immediate for RSB. */
16273 if (GET_MODE (x
) == SImode
16274 && GET_CODE (x
) == NOT
16275 && CONST_INT_P (y
))
16278 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
16281 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
16282 && GET_CODE (x
) == PLUS
16283 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
16286 if (GET_MODE (x
) == DImode
16287 && GET_CODE (x
) == PLUS
16288 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
16290 && UINTVAL (y
) == 0x800000000
16291 && (op
== GEU
|| op
== LTU
))
16294 if (GET_MODE (x
) == DImode
16295 && (op
== GE
|| op
== LT
)
16296 && GET_CODE (x
) == SIGN_EXTEND
16297 && ((GET_CODE (y
) == PLUS
16298 && arm_borrow_operation (XEXP (y
, 0), DImode
))
16299 || arm_borrow_operation (y
, DImode
)))
16302 if (GET_MODE (x
) == DImode
16303 && (op
== GEU
|| op
== LTU
)
16304 && GET_CODE (x
) == ZERO_EXTEND
16305 && ((GET_CODE (y
) == PLUS
16306 && arm_borrow_operation (XEXP (y
, 0), DImode
))
16307 || arm_borrow_operation (y
, DImode
)))
16310 if (GET_MODE (x
) == DImode
16311 && (op
== EQ
|| op
== NE
)
16312 && (GET_CODE (x
) == PLUS
16313 || GET_CODE (x
) == MINUS
)
16314 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
16315 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
16316 && GET_CODE (y
) == SIGN_EXTEND
16317 && GET_CODE (XEXP (y
, 0)) == GET_CODE (x
))
16320 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
16321 return GET_MODE (x
);
16326 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
16327 the sequence of instructions needed to generate a suitable condition
16328 code register. Return the CC register result. */
16330 arm_gen_dicompare_reg (rtx_code code
, rtx x
, rtx y
, rtx scratch
)
16335 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
16336 gcc_assert (TARGET_32BIT
);
16337 gcc_assert (!CONST_INT_P (x
));
16339 rtx x_lo
= simplify_gen_subreg (SImode
, x
, DImode
,
16340 subreg_lowpart_offset (SImode
, DImode
));
16341 rtx x_hi
= simplify_gen_subreg (SImode
, x
, DImode
,
16342 subreg_highpart_offset (SImode
, DImode
));
16343 rtx y_lo
= simplify_gen_subreg (SImode
, y
, DImode
,
16344 subreg_lowpart_offset (SImode
, DImode
));
16345 rtx y_hi
= simplify_gen_subreg (SImode
, y
, DImode
,
16346 subreg_highpart_offset (SImode
, DImode
));
16352 if (y_lo
== const0_rtx
|| y_hi
== const0_rtx
)
16354 if (y_lo
!= const0_rtx
)
16356 rtx scratch2
= scratch
? scratch
: gen_reg_rtx (SImode
);
16358 gcc_assert (y_hi
== const0_rtx
);
16359 y_lo
= gen_int_mode (-INTVAL (y_lo
), SImode
);
16360 if (!arm_add_operand (y_lo
, SImode
))
16361 y_lo
= force_reg (SImode
, y_lo
);
16362 emit_insn (gen_addsi3 (scratch2
, x_lo
, y_lo
));
16365 else if (y_hi
!= const0_rtx
)
16367 rtx scratch2
= scratch
? scratch
: gen_reg_rtx (SImode
);
16369 y_hi
= gen_int_mode (-INTVAL (y_hi
), SImode
);
16370 if (!arm_add_operand (y_hi
, SImode
))
16371 y_hi
= force_reg (SImode
, y_hi
);
16372 emit_insn (gen_addsi3 (scratch2
, x_hi
, y_hi
));
16378 gcc_assert (!reload_completed
);
16379 scratch
= gen_rtx_SCRATCH (SImode
);
16382 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
16383 cc_reg
= gen_rtx_REG (CC_NZmode
, CC_REGNUM
);
16386 = gen_rtx_SET (cc_reg
,
16387 gen_rtx_COMPARE (CC_NZmode
,
16388 gen_rtx_IOR (SImode
, x_lo
, x_hi
),
16390 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
,
16395 if (!arm_add_operand (y_lo
, SImode
))
16396 y_lo
= force_reg (SImode
, y_lo
);
16398 if (!arm_add_operand (y_hi
, SImode
))
16399 y_hi
= force_reg (SImode
, y_hi
);
16401 rtx cmp1
= gen_rtx_NE (SImode
, x_lo
, y_lo
);
16402 rtx cmp2
= gen_rtx_NE (SImode
, x_hi
, y_hi
);
16403 rtx conjunction
= gen_rtx_IOR (SImode
, cmp1
, cmp2
);
16404 mode
= SELECT_CC_MODE (code
, conjunction
, const0_rtx
);
16405 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
16407 emit_insn (gen_rtx_SET (cc_reg
,
16408 gen_rtx_COMPARE (mode
, conjunction
,
16416 if (y_lo
== const0_rtx
)
16418 /* If the low word of y is 0, then this is simply a normal
16419 compare of the upper words. */
16420 if (!arm_add_operand (y_hi
, SImode
))
16421 y_hi
= force_reg (SImode
, y_hi
);
16423 return arm_gen_compare_reg (code
, x_hi
, y_hi
, NULL_RTX
);
16426 if (!arm_add_operand (y_lo
, SImode
))
16427 y_lo
= force_reg (SImode
, y_lo
);
16430 = gen_rtx_LTU (DImode
,
16431 arm_gen_compare_reg (LTU
, x_lo
, y_lo
, NULL_RTX
),
16435 scratch
= gen_rtx_SCRATCH (SImode
);
16437 if (!arm_not_operand (y_hi
, SImode
))
16438 y_hi
= force_reg (SImode
, y_hi
);
16441 if (y_hi
== const0_rtx
)
16442 insn
= emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch
, x_hi
,
16444 else if (CONST_INT_P (y_hi
))
16445 insn
= emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch
, x_hi
,
16448 insn
= emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch
, x_hi
, y_hi
,
16450 return SET_DEST (single_set (insn
));
16456 /* During expansion, we only expect to get here if y is a
16457 constant that we want to handle, otherwise we should have
16458 swapped the operands already. */
16459 gcc_assert (arm_const_double_prefer_rsbs_rsc (y
));
16461 if (!const_ok_for_arm (INTVAL (y_lo
)))
16462 y_lo
= force_reg (SImode
, y_lo
);
16464 /* Perform a reverse subtract and compare. */
16466 = gen_rtx_LTU (DImode
,
16467 arm_gen_compare_reg (LTU
, y_lo
, x_lo
, scratch
),
16469 rtx_insn
*insn
= emit_insn (gen_rscsi3_CC_NVout_scratch (scratch
, y_hi
,
16471 return SET_DEST (single_set (insn
));
16477 if (y_lo
== const0_rtx
)
16479 /* If the low word of y is 0, then this is simply a normal
16480 compare of the upper words. */
16481 if (!arm_add_operand (y_hi
, SImode
))
16482 y_hi
= force_reg (SImode
, y_hi
);
16484 return arm_gen_compare_reg (code
, x_hi
, y_hi
, NULL_RTX
);
16487 if (!arm_add_operand (y_lo
, SImode
))
16488 y_lo
= force_reg (SImode
, y_lo
);
16491 = gen_rtx_LTU (DImode
,
16492 arm_gen_compare_reg (LTU
, x_lo
, y_lo
, NULL_RTX
),
16496 scratch
= gen_rtx_SCRATCH (SImode
);
16497 if (!arm_not_operand (y_hi
, SImode
))
16498 y_hi
= force_reg (SImode
, y_hi
);
16501 if (y_hi
== const0_rtx
)
16502 insn
= emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch
, x_hi
,
16504 else if (CONST_INT_P (y_hi
))
16506 /* Constant is viewed as unsigned when zero-extended. */
16507 y_hi
= GEN_INT (UINTVAL (y_hi
) & 0xffffffffULL
);
16508 insn
= emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch
, x_hi
,
16512 insn
= emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch
, x_hi
, y_hi
,
16514 return SET_DEST (single_set (insn
));
16520 /* During expansion, we only expect to get here if y is a
16521 constant that we want to handle, otherwise we should have
16522 swapped the operands already. */
16523 gcc_assert (arm_const_double_prefer_rsbs_rsc (y
));
16525 if (!const_ok_for_arm (INTVAL (y_lo
)))
16526 y_lo
= force_reg (SImode
, y_lo
);
16528 /* Perform a reverse subtract and compare. */
16530 = gen_rtx_LTU (DImode
,
16531 arm_gen_compare_reg (LTU
, y_lo
, x_lo
, scratch
),
16533 y_hi
= GEN_INT (0xffffffff & UINTVAL (y_hi
));
16534 rtx_insn
*insn
= emit_insn (gen_rscsi3_CC_Bout_scratch (scratch
, y_hi
,
16536 return SET_DEST (single_set (insn
));
16540 gcc_unreachable ();
16544 /* X and Y are two things to compare using CODE. Emit the compare insn and
16545 return the rtx for register 0 in the proper mode. */
16547 arm_gen_compare_reg (rtx_code code
, rtx x
, rtx y
, rtx scratch
)
16549 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
16550 return arm_gen_dicompare_reg (code
, x
, y
, scratch
);
16552 machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
16553 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
16554 if (mode
== CC_RSBmode
)
16557 scratch
= gen_rtx_SCRATCH (SImode
);
16558 emit_insn (gen_rsb_imm_compare_scratch (scratch
,
16559 GEN_INT (~UINTVAL (x
)), y
));
16562 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
16567 /* Generate a sequence of insns that will generate the correct return
16568 address mask depending on the physical architecture that the program
16571 arm_gen_return_addr_mask (void)
16573 rtx reg
= gen_reg_rtx (Pmode
);
16575 emit_insn (gen_return_addr_mask (reg
));
16580 arm_reload_in_hi (rtx
*operands
)
16582 rtx ref
= operands
[1];
16584 HOST_WIDE_INT offset
= 0;
16586 if (SUBREG_P (ref
))
16588 offset
= SUBREG_BYTE (ref
);
16589 ref
= SUBREG_REG (ref
);
16594 /* We have a pseudo which has been spilt onto the stack; there
16595 are two cases here: the first where there is a simple
16596 stack-slot replacement and a second where the stack-slot is
16597 out of range, or is used as a subreg. */
16598 if (reg_equiv_mem (REGNO (ref
)))
16600 ref
= reg_equiv_mem (REGNO (ref
));
16601 base
= find_replacement (&XEXP (ref
, 0));
16604 /* The slot is out of range, or was dressed up in a SUBREG. */
16605 base
= reg_equiv_address (REGNO (ref
));
16607 /* PR 62554: If there is no equivalent memory location then just move
16608 the value as an SImode register move. This happens when the target
16609 architecture variant does not have an HImode register move. */
16612 gcc_assert (REG_P (operands
[0]));
16613 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16614 gen_rtx_SUBREG (SImode
, ref
, 0)));
16619 base
= find_replacement (&XEXP (ref
, 0));
16621 /* Handle the case where the address is too complex to be offset by 1. */
16622 if (GET_CODE (base
) == MINUS
16623 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
16625 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16627 emit_set_insn (base_plus
, base
);
16630 else if (GET_CODE (base
) == PLUS
)
16632 /* The addend must be CONST_INT, or we would have dealt with it above. */
16633 HOST_WIDE_INT hi
, lo
;
16635 offset
+= INTVAL (XEXP (base
, 1));
16636 base
= XEXP (base
, 0);
16638 /* Rework the address into a legal sequence of insns. */
16639 /* Valid range for lo is -4095 -> 4095 */
16642 : -((-offset
) & 0xfff));
16644 /* Corner case, if lo is the max offset then we would be out of range
16645 once we have added the additional 1 below, so bump the msb into the
16646 pre-loading insn(s). */
16650 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
16651 ^ (HOST_WIDE_INT
) 0x80000000)
16652 - (HOST_WIDE_INT
) 0x80000000);
16654 gcc_assert (hi
+ lo
== offset
);
16658 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16660 /* Get the base address; addsi3 knows how to handle constants
16661 that require more than one insn. */
16662 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
16668 /* Operands[2] may overlap operands[0] (though it won't overlap
16669 operands[1]), that's why we asked for a DImode reg -- so we can
16670 use the bit that does not overlap. */
16671 if (REGNO (operands
[2]) == REGNO (operands
[0]))
16672 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16674 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
16676 emit_insn (gen_zero_extendqisi2 (scratch
,
16677 gen_rtx_MEM (QImode
,
16678 plus_constant (Pmode
, base
,
16680 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16681 gen_rtx_MEM (QImode
,
16682 plus_constant (Pmode
, base
,
16684 if (!BYTES_BIG_ENDIAN
)
16685 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16686 gen_rtx_IOR (SImode
,
16689 gen_rtx_SUBREG (SImode
, operands
[0], 0),
16693 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16694 gen_rtx_IOR (SImode
,
16695 gen_rtx_ASHIFT (SImode
, scratch
,
16697 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
16700 /* Handle storing a half-word to memory during reload by synthesizing as two
16701 byte stores. Take care not to clobber the input values until after we
16702 have moved them somewhere safe. This code assumes that if the DImode
16703 scratch in operands[2] overlaps either the input value or output address
16704 in some way, then that value must die in this insn (we absolutely need
16705 two scratch registers for some corner cases). */
16707 arm_reload_out_hi (rtx
*operands
)
16709 rtx ref
= operands
[0];
16710 rtx outval
= operands
[1];
16712 HOST_WIDE_INT offset
= 0;
16714 if (SUBREG_P (ref
))
16716 offset
= SUBREG_BYTE (ref
);
16717 ref
= SUBREG_REG (ref
);
16722 /* We have a pseudo which has been spilt onto the stack; there
16723 are two cases here: the first where there is a simple
16724 stack-slot replacement and a second where the stack-slot is
16725 out of range, or is used as a subreg. */
16726 if (reg_equiv_mem (REGNO (ref
)))
16728 ref
= reg_equiv_mem (REGNO (ref
));
16729 base
= find_replacement (&XEXP (ref
, 0));
16732 /* The slot is out of range, or was dressed up in a SUBREG. */
16733 base
= reg_equiv_address (REGNO (ref
));
16735 /* PR 62254: If there is no equivalent memory location then just move
16736 the value as an SImode register move. This happens when the target
16737 architecture variant does not have an HImode register move. */
16740 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
16742 if (REG_P (outval
))
16744 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
16745 gen_rtx_SUBREG (SImode
, outval
, 0)));
16747 else /* SUBREG_P (outval) */
16749 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
16750 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
16751 SUBREG_REG (outval
)));
16753 /* FIXME: Handle other cases ? */
16754 gcc_unreachable ();
16760 base
= find_replacement (&XEXP (ref
, 0));
16762 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
16764 /* Handle the case where the address is too complex to be offset by 1. */
16765 if (GET_CODE (base
) == MINUS
16766 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
16768 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16770 /* Be careful not to destroy OUTVAL. */
16771 if (reg_overlap_mentioned_p (base_plus
, outval
))
16773 /* Updating base_plus might destroy outval, see if we can
16774 swap the scratch and base_plus. */
16775 if (!reg_overlap_mentioned_p (scratch
, outval
))
16776 std::swap (scratch
, base_plus
);
16779 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
16781 /* Be conservative and copy OUTVAL into the scratch now,
16782 this should only be necessary if outval is a subreg
16783 of something larger than a word. */
16784 /* XXX Might this clobber base? I can't see how it can,
16785 since scratch is known to overlap with OUTVAL, and
16786 must be wider than a word. */
16787 emit_insn (gen_movhi (scratch_hi
, outval
));
16788 outval
= scratch_hi
;
16792 emit_set_insn (base_plus
, base
);
16795 else if (GET_CODE (base
) == PLUS
)
16797 /* The addend must be CONST_INT, or we would have dealt with it above. */
16798 HOST_WIDE_INT hi
, lo
;
16800 offset
+= INTVAL (XEXP (base
, 1));
16801 base
= XEXP (base
, 0);
16803 /* Rework the address into a legal sequence of insns. */
16804 /* Valid range for lo is -4095 -> 4095 */
16807 : -((-offset
) & 0xfff));
16809 /* Corner case, if lo is the max offset then we would be out of range
16810 once we have added the additional 1 below, so bump the msb into the
16811 pre-loading insn(s). */
16815 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
16816 ^ (HOST_WIDE_INT
) 0x80000000)
16817 - (HOST_WIDE_INT
) 0x80000000);
16819 gcc_assert (hi
+ lo
== offset
);
16823 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16825 /* Be careful not to destroy OUTVAL. */
16826 if (reg_overlap_mentioned_p (base_plus
, outval
))
16828 /* Updating base_plus might destroy outval, see if we
16829 can swap the scratch and base_plus. */
16830 if (!reg_overlap_mentioned_p (scratch
, outval
))
16831 std::swap (scratch
, base_plus
);
16834 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
16836 /* Be conservative and copy outval into scratch now,
16837 this should only be necessary if outval is a
16838 subreg of something larger than a word. */
16839 /* XXX Might this clobber base? I can't see how it
16840 can, since scratch is known to overlap with
16842 emit_insn (gen_movhi (scratch_hi
, outval
));
16843 outval
= scratch_hi
;
16847 /* Get the base address; addsi3 knows how to handle constants
16848 that require more than one insn. */
16849 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
16855 if (BYTES_BIG_ENDIAN
)
16857 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
16858 plus_constant (Pmode
, base
,
16860 gen_lowpart (QImode
, outval
)));
16861 emit_insn (gen_lshrsi3 (scratch
,
16862 gen_rtx_SUBREG (SImode
, outval
, 0),
16864 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
16866 gen_lowpart (QImode
, scratch
)));
16870 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
16872 gen_lowpart (QImode
, outval
)));
16873 emit_insn (gen_lshrsi3 (scratch
,
16874 gen_rtx_SUBREG (SImode
, outval
, 0),
16876 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
16877 plus_constant (Pmode
, base
,
16879 gen_lowpart (QImode
, scratch
)));
16883 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16884 (padded to the size of a word) should be passed in a register. */
16887 arm_must_pass_in_stack (const function_arg_info
&arg
)
16889 if (TARGET_AAPCS_BASED
)
16890 return must_pass_in_stack_var_size (arg
);
16892 return must_pass_in_stack_var_size_or_pad (arg
);
16896 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16897 byte of a stack argument has useful data. For legacy APCS ABIs we use
16898 the default. For AAPCS based ABIs small aggregate types are placed
16899 in the lowest memory address. */
16901 static pad_direction
16902 arm_function_arg_padding (machine_mode mode
, const_tree type
)
16904 if (!TARGET_AAPCS_BASED
)
16905 return default_function_arg_padding (mode
, type
);
16907 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
16908 return PAD_DOWNWARD
;
16914 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16915 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16916 register has useful data, and return the opposite if the most
16917 significant byte does. */
16920 arm_pad_reg_upward (machine_mode mode
,
16921 tree type
, int first ATTRIBUTE_UNUSED
)
16923 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
16925 /* For AAPCS, small aggregates, small fixed-point types,
16926 and small complex types are always padded upwards. */
16929 if ((AGGREGATE_TYPE_P (type
)
16930 || TREE_CODE (type
) == COMPLEX_TYPE
16931 || FIXED_POINT_TYPE_P (type
))
16932 && int_size_in_bytes (type
) <= 4)
16937 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
16938 && GET_MODE_SIZE (mode
) <= 4)
16943 /* Otherwise, use default padding. */
16944 return !BYTES_BIG_ENDIAN
;
16947 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16948 assuming that the address in the base register is word aligned. */
16950 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
16952 HOST_WIDE_INT max_offset
;
16954 /* Offset must be a multiple of 4 in Thumb mode. */
16955 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
16960 else if (TARGET_ARM
)
16965 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
16968 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16969 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16970 Assumes that the address in the base register RN is word aligned. Pattern
16971 guarantees that both memory accesses use the same base register,
16972 the offsets are constants within the range, and the gap between the offsets is 4.
16973 If preload complete then check that registers are legal. WBACK indicates whether
16974 address is updated. LOAD indicates whether memory access is load or store. */
16976 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
16977 bool wback
, bool load
)
16979 unsigned int t
, t2
, n
;
16981 if (!reload_completed
)
16984 if (!offset_ok_for_ldrd_strd (offset
))
16991 if ((TARGET_THUMB2
)
16992 && ((wback
&& (n
== t
|| n
== t2
))
16993 || (t
== SP_REGNUM
)
16994 || (t
== PC_REGNUM
)
16995 || (t2
== SP_REGNUM
)
16996 || (t2
== PC_REGNUM
)
16997 || (!load
&& (n
== PC_REGNUM
))
16998 || (load
&& (t
== t2
))
16999 /* Triggers Cortex-M3 LDRD errata. */
17000 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
17004 && ((wback
&& (n
== t
|| n
== t2
))
17005 || (t2
== PC_REGNUM
)
17006 || (t
% 2 != 0) /* First destination register is not even. */
17008 /* PC can be used as base register (for offset addressing only),
17009 but it is depricated. */
17010 || (n
== PC_REGNUM
)))
17016 /* Return true if a 64-bit access with alignment ALIGN and with a
17017 constant offset OFFSET from the base pointer is permitted on this
17020 align_ok_ldrd_strd (HOST_WIDE_INT align
, HOST_WIDE_INT offset
)
17022 return (unaligned_access
17023 ? (align
>= BITS_PER_WORD
&& (offset
& 3) == 0)
17024 : (align
>= 2 * BITS_PER_WORD
&& (offset
& 7) == 0));
17027 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
17028 operand MEM's address contains an immediate offset from the base
17029 register and has no side effects, in which case it sets BASE,
17030 OFFSET and ALIGN accordingly. */
17032 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
, HOST_WIDE_INT
*align
)
17036 gcc_assert (base
!= NULL
&& offset
!= NULL
);
17038 /* TODO: Handle more general memory operand patterns, such as
17039 PRE_DEC and PRE_INC. */
17041 if (side_effects_p (mem
))
17044 /* Can't deal with subregs. */
17045 if (SUBREG_P (mem
))
17048 gcc_assert (MEM_P (mem
));
17050 *offset
= const0_rtx
;
17051 *align
= MEM_ALIGN (mem
);
17053 addr
= XEXP (mem
, 0);
17055 /* If addr isn't valid for DImode, then we can't handle it. */
17056 if (!arm_legitimate_address_p (DImode
, addr
,
17057 reload_in_progress
|| reload_completed
))
17065 else if (GET_CODE (addr
) == PLUS
)
17067 *base
= XEXP (addr
, 0);
17068 *offset
= XEXP (addr
, 1);
17069 return (REG_P (*base
) && CONST_INT_P (*offset
));
17075 /* Called from a peephole2 to replace two word-size accesses with a
17076 single LDRD/STRD instruction. Returns true iff we can generate a
17077 new instruction sequence. That is, both accesses use the same base
17078 register and the gap between constant offsets is 4. This function
17079 may reorder its operands to match ldrd/strd RTL templates.
17080 OPERANDS are the operands found by the peephole matcher;
17081 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
17082 corresponding memory operands. LOAD indicaates whether the access
17083 is load or store. CONST_STORE indicates a store of constant
17084 integer values held in OPERANDS[4,5] and assumes that the pattern
17085 is of length 4 insn, for the purpose of checking dead registers.
17086 COMMUTE indicates that register operands may be reordered. */
17088 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
17089 bool const_store
, bool commute
)
17092 HOST_WIDE_INT offsets
[2], offset
, align
[2];
17093 rtx base
= NULL_RTX
;
17094 rtx cur_base
, cur_offset
, tmp
;
17096 HARD_REG_SET regset
;
17098 gcc_assert (!const_store
|| !load
);
17099 /* Check that the memory references are immediate offsets from the
17100 same base register. Extract the base register, the destination
17101 registers, and the corresponding memory offsets. */
17102 for (i
= 0; i
< nops
; i
++)
17104 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
17110 else if (REGNO (base
) != REGNO (cur_base
))
17113 offsets
[i
] = INTVAL (cur_offset
);
17114 if (GET_CODE (operands
[i
]) == SUBREG
)
17116 tmp
= SUBREG_REG (operands
[i
]);
17117 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
17122 /* Make sure there is no dependency between the individual loads. */
17123 if (load
&& REGNO (operands
[0]) == REGNO (base
))
17124 return false; /* RAW */
17126 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
17127 return false; /* WAW */
17129 /* If the same input register is used in both stores
17130 when storing different constants, try to find a free register.
17131 For example, the code
17136 can be transformed into
17140 in Thumb mode assuming that r1 is free.
17141 For ARM mode do the same but only if the starting register
17142 can be made to be even. */
17144 && REGNO (operands
[0]) == REGNO (operands
[1])
17145 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
17149 CLEAR_HARD_REG_SET (regset
);
17150 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17151 if (tmp
== NULL_RTX
)
17154 /* Use the new register in the first load to ensure that
17155 if the original input register is not dead after peephole,
17156 then it will have the correct constant value. */
17159 else if (TARGET_ARM
)
17161 int regno
= REGNO (operands
[0]);
17162 if (!peep2_reg_dead_p (4, operands
[0]))
17164 /* When the input register is even and is not dead after the
17165 pattern, it has to hold the second constant but we cannot
17166 form a legal STRD in ARM mode with this register as the second
17168 if (regno
% 2 == 0)
17171 /* Is regno-1 free? */
17172 SET_HARD_REG_SET (regset
);
17173 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
17174 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17175 if (tmp
== NULL_RTX
)
17182 /* Find a DImode register. */
17183 CLEAR_HARD_REG_SET (regset
);
17184 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
17185 if (tmp
!= NULL_RTX
)
17187 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
17188 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
17192 /* Can we use the input register to form a DI register? */
17193 SET_HARD_REG_SET (regset
);
17194 CLEAR_HARD_REG_BIT(regset
,
17195 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
17196 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17197 if (tmp
== NULL_RTX
)
17199 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
17203 gcc_assert (operands
[0] != NULL_RTX
);
17204 gcc_assert (operands
[1] != NULL_RTX
);
17205 gcc_assert (REGNO (operands
[0]) % 2 == 0);
17206 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
17210 /* Make sure the instructions are ordered with lower memory access first. */
17211 if (offsets
[0] > offsets
[1])
17213 gap
= offsets
[0] - offsets
[1];
17214 offset
= offsets
[1];
17216 /* Swap the instructions such that lower memory is accessed first. */
17217 std::swap (operands
[0], operands
[1]);
17218 std::swap (operands
[2], operands
[3]);
17219 std::swap (align
[0], align
[1]);
17221 std::swap (operands
[4], operands
[5]);
17225 gap
= offsets
[1] - offsets
[0];
17226 offset
= offsets
[0];
17229 /* Make sure accesses are to consecutive memory locations. */
17230 if (gap
!= GET_MODE_SIZE (SImode
))
17233 if (!align_ok_ldrd_strd (align
[0], offset
))
17236 /* Make sure we generate legal instructions. */
17237 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17241 /* In Thumb state, where registers are almost unconstrained, there
17242 is little hope to fix it. */
17246 if (load
&& commute
)
17248 /* Try reordering registers. */
17249 std::swap (operands
[0], operands
[1]);
17250 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17257 /* If input registers are dead after this pattern, they can be
17258 reordered or replaced by other registers that are free in the
17259 current pattern. */
17260 if (!peep2_reg_dead_p (4, operands
[0])
17261 || !peep2_reg_dead_p (4, operands
[1]))
17264 /* Try to reorder the input registers. */
17265 /* For example, the code
17270 can be transformed into
17275 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
17278 std::swap (operands
[0], operands
[1]);
17282 /* Try to find a free DI register. */
17283 CLEAR_HARD_REG_SET (regset
);
17284 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
17285 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
17288 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
17289 if (tmp
== NULL_RTX
)
17292 /* DREG must be an even-numbered register in DImode.
17293 Split it into SI registers. */
17294 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
17295 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
17296 gcc_assert (operands
[0] != NULL_RTX
);
17297 gcc_assert (operands
[1] != NULL_RTX
);
17298 gcc_assert (REGNO (operands
[0]) % 2 == 0);
17299 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
17301 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
17311 /* Return true if parallel execution of the two word-size accesses provided
17312 could be satisfied with a single LDRD/STRD instruction. Two word-size
17313 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17314 register operands and OPERANDS[2,3] are the corresponding memory operands.
17317 valid_operands_ldrd_strd (rtx
*operands
, bool load
)
17320 HOST_WIDE_INT offsets
[2], offset
, align
[2];
17321 rtx base
= NULL_RTX
;
17322 rtx cur_base
, cur_offset
;
17325 /* Check that the memory references are immediate offsets from the
17326 same base register. Extract the base register, the destination
17327 registers, and the corresponding memory offsets. */
17328 for (i
= 0; i
< nops
; i
++)
17330 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
17336 else if (REGNO (base
) != REGNO (cur_base
))
17339 offsets
[i
] = INTVAL (cur_offset
);
17340 if (GET_CODE (operands
[i
]) == SUBREG
)
17344 if (offsets
[0] > offsets
[1])
17347 gap
= offsets
[1] - offsets
[0];
17348 offset
= offsets
[0];
17350 /* Make sure accesses are to consecutive memory locations. */
17351 if (gap
!= GET_MODE_SIZE (SImode
))
17354 if (!align_ok_ldrd_strd (align
[0], offset
))
17357 return operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17362 /* Print a symbolic form of X to the debug file, F. */
17364 arm_print_value (FILE *f
, rtx x
)
17366 switch (GET_CODE (x
))
17369 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
17375 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
17376 sizeof (fpstr
), 0, 1);
17386 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
17388 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
17389 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
17397 fprintf (f
, "\"%s\"", XSTR (x
, 0));
17401 fprintf (f
, "`%s'", XSTR (x
, 0));
17405 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
17409 arm_print_value (f
, XEXP (x
, 0));
17413 arm_print_value (f
, XEXP (x
, 0));
17415 arm_print_value (f
, XEXP (x
, 1));
17423 fprintf (f
, "????");
17428 /* Routines for manipulation of the constant pool. */
17430 /* Arm instructions cannot load a large constant directly into a
17431 register; they have to come from a pc relative load. The constant
17432 must therefore be placed in the addressable range of the pc
17433 relative load. Depending on the precise pc relative load
17434 instruction the range is somewhere between 256 bytes and 4k. This
17435 means that we often have to dump a constant inside a function, and
17436 generate code to branch around it.
17438 It is important to minimize this, since the branches will slow
17439 things down and make the code larger.
17441 Normally we can hide the table after an existing unconditional
17442 branch so that there is no interruption of the flow, but in the
17443 worst case the code looks like this:
17461 We fix this by performing a scan after scheduling, which notices
17462 which instructions need to have their operands fetched from the
17463 constant table and builds the table.
17465 The algorithm starts by building a table of all the constants that
17466 need fixing up and all the natural barriers in the function (places
17467 where a constant table can be dropped without breaking the flow).
17468 For each fixup we note how far the pc-relative replacement will be
17469 able to reach and the offset of the instruction into the function.
17471 Having built the table we then group the fixes together to form
17472 tables that are as large as possible (subject to addressing
17473 constraints) and emit each table of constants after the last
17474 barrier that is within range of all the instructions in the group.
17475 If a group does not contain a barrier, then we forcibly create one
17476 by inserting a jump instruction into the flow. Once the table has
17477 been inserted, the insns are then modified to reference the
17478 relevant entry in the pool.
17480 Possible enhancements to the algorithm (not implemented) are:
17482 1) For some processors and object formats, there may be benefit in
17483 aligning the pools to the start of cache lines; this alignment
17484 would need to be taken into account when calculating addressability
17487 /* These typedefs are located at the start of this file, so that
17488 they can be used in the prototypes there. This comment is to
17489 remind readers of that fact so that the following structures
17490 can be understood more easily.
17492 typedef struct minipool_node Mnode;
17493 typedef struct minipool_fixup Mfix; */
17495 struct minipool_node
17497 /* Doubly linked chain of entries. */
17500 /* The maximum offset into the code that this entry can be placed. While
17501 pushing fixes for forward references, all entries are sorted in order
17502 of increasing max_address. */
17503 HOST_WIDE_INT max_address
;
17504 /* Similarly for an entry inserted for a backwards ref. */
17505 HOST_WIDE_INT min_address
;
17506 /* The number of fixes referencing this entry. This can become zero
17507 if we "unpush" an entry. In this case we ignore the entry when we
17508 come to emit the code. */
17510 /* The offset from the start of the minipool. */
17511 HOST_WIDE_INT offset
;
17512 /* The value in table. */
17514 /* The mode of value. */
17516 /* The size of the value. With iWMMXt enabled
17517 sizes > 4 also imply an alignment of 8-bytes. */
17521 struct minipool_fixup
17525 HOST_WIDE_INT address
;
17531 HOST_WIDE_INT forwards
;
17532 HOST_WIDE_INT backwards
;
17535 /* Fixes less than a word need padding out to a word boundary. */
17536 #define MINIPOOL_FIX_SIZE(mode) \
17537 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17539 static Mnode
* minipool_vector_head
;
17540 static Mnode
* minipool_vector_tail
;
17541 static rtx_code_label
*minipool_vector_label
;
17542 static int minipool_pad
;
17544 /* The linked list of all minipool fixes required for this function. */
17545 Mfix
* minipool_fix_head
;
17546 Mfix
* minipool_fix_tail
;
17547 /* The fix entry for the current minipool, once it has been placed. */
17548 Mfix
* minipool_barrier
;
17550 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17551 #define JUMP_TABLES_IN_TEXT_SECTION 0
17554 static HOST_WIDE_INT
17555 get_jump_table_size (rtx_jump_table_data
*insn
)
17557 /* ADDR_VECs only take room if read-only data does into the text
17559 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
17561 rtx body
= PATTERN (insn
);
17562 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
17563 HOST_WIDE_INT size
;
17564 HOST_WIDE_INT modesize
;
17566 modesize
= GET_MODE_SIZE (GET_MODE (body
));
17567 size
= modesize
* XVECLEN (body
, elt
);
17571 /* Round up size of TBB table to a halfword boundary. */
17572 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
17575 /* No padding necessary for TBH. */
17578 /* Add two bytes for alignment on Thumb. */
17583 gcc_unreachable ();
17591 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17592 function descriptor) into a register and the GOT address into the
17593 FDPIC register, returning an rtx for the register holding the
17594 function address. */
17597 arm_load_function_descriptor (rtx funcdesc
)
17599 rtx fnaddr_reg
= gen_reg_rtx (Pmode
);
17600 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
17601 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
17602 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
17604 emit_move_insn (fnaddr_reg
, fnaddr
);
17606 /* The ABI requires the entry point address to be loaded first, but
17607 since we cannot support lazy binding for lack of atomic load of
17608 two 32-bits values, we do not need to bother to prevent the
17609 previous load from being moved after that of the GOT address. */
17610 emit_insn (gen_restore_pic_register_after_call (pic_reg
, gotaddr
));
17615 /* Return the maximum amount of padding that will be inserted before
17617 static HOST_WIDE_INT
17618 get_label_padding (rtx label
)
17620 HOST_WIDE_INT align
, min_insn_size
;
17622 align
= 1 << label_to_alignment (label
).levels
[0].log
;
17623 min_insn_size
= TARGET_THUMB
? 2 : 4;
17624 return align
> min_insn_size
? align
- min_insn_size
: 0;
17627 /* Move a minipool fix MP from its current location to before MAX_MP.
17628 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17629 constraints may need updating. */
17631 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
17632 HOST_WIDE_INT max_address
)
17634 /* The code below assumes these are different. */
17635 gcc_assert (mp
!= max_mp
);
17637 if (max_mp
== NULL
)
17639 if (max_address
< mp
->max_address
)
17640 mp
->max_address
= max_address
;
17644 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
17645 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
17647 mp
->max_address
= max_address
;
17649 /* Unlink MP from its current position. Since max_mp is non-null,
17650 mp->prev must be non-null. */
17651 mp
->prev
->next
= mp
->next
;
17652 if (mp
->next
!= NULL
)
17653 mp
->next
->prev
= mp
->prev
;
17655 minipool_vector_tail
= mp
->prev
;
17657 /* Re-insert it before MAX_MP. */
17659 mp
->prev
= max_mp
->prev
;
17662 if (mp
->prev
!= NULL
)
17663 mp
->prev
->next
= mp
;
17665 minipool_vector_head
= mp
;
17668 /* Save the new entry. */
17671 /* Scan over the preceding entries and adjust their addresses as
17673 while (mp
->prev
!= NULL
17674 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
17676 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
17683 /* Add a constant to the minipool for a forward reference. Returns the
17684 node added or NULL if the constant will not fit in this pool. */
17686 add_minipool_forward_ref (Mfix
*fix
)
17688 /* If set, max_mp is the first pool_entry that has a lower
17689 constraint than the one we are trying to add. */
17690 Mnode
* max_mp
= NULL
;
17691 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
17694 /* If the minipool starts before the end of FIX->INSN then this FIX
17695 cannot be placed into the current pool. Furthermore, adding the
17696 new constant pool entry may cause the pool to start FIX_SIZE bytes
17698 if (minipool_vector_head
&&
17699 (fix
->address
+ get_attr_length (fix
->insn
)
17700 >= minipool_vector_head
->max_address
- fix
->fix_size
))
17703 /* Scan the pool to see if a constant with the same value has
17704 already been added. While we are doing this, also note the
17705 location where we must insert the constant if it doesn't already
17707 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17709 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
17710 && fix
->mode
== mp
->mode
17711 && (!LABEL_P (fix
->value
)
17712 || (CODE_LABEL_NUMBER (fix
->value
)
17713 == CODE_LABEL_NUMBER (mp
->value
)))
17714 && rtx_equal_p (fix
->value
, mp
->value
))
17716 /* More than one fix references this entry. */
17718 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
17721 /* Note the insertion point if necessary. */
17723 && mp
->max_address
> max_address
)
17726 /* If we are inserting an 8-bytes aligned quantity and
17727 we have not already found an insertion point, then
17728 make sure that all such 8-byte aligned quantities are
17729 placed at the start of the pool. */
17730 if (ARM_DOUBLEWORD_ALIGN
17732 && fix
->fix_size
>= 8
17733 && mp
->fix_size
< 8)
17736 max_address
= mp
->max_address
;
17740 /* The value is not currently in the minipool, so we need to create
17741 a new entry for it. If MAX_MP is NULL, the entry will be put on
17742 the end of the list since the placement is less constrained than
17743 any existing entry. Otherwise, we insert the new fix before
17744 MAX_MP and, if necessary, adjust the constraints on the other
17747 mp
->fix_size
= fix
->fix_size
;
17748 mp
->mode
= fix
->mode
;
17749 mp
->value
= fix
->value
;
17751 /* Not yet required for a backwards ref. */
17752 mp
->min_address
= -65536;
17754 if (max_mp
== NULL
)
17756 mp
->max_address
= max_address
;
17758 mp
->prev
= minipool_vector_tail
;
17760 if (mp
->prev
== NULL
)
17762 minipool_vector_head
= mp
;
17763 minipool_vector_label
= gen_label_rtx ();
17766 mp
->prev
->next
= mp
;
17768 minipool_vector_tail
= mp
;
17772 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
17773 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
17775 mp
->max_address
= max_address
;
17778 mp
->prev
= max_mp
->prev
;
17780 if (mp
->prev
!= NULL
)
17781 mp
->prev
->next
= mp
;
17783 minipool_vector_head
= mp
;
17786 /* Save the new entry. */
17789 /* Scan over the preceding entries and adjust their addresses as
17791 while (mp
->prev
!= NULL
17792 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
17794 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
17802 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
17803 HOST_WIDE_INT min_address
)
17805 HOST_WIDE_INT offset
;
17807 /* The code below assumes these are different. */
17808 gcc_assert (mp
!= min_mp
);
17810 if (min_mp
== NULL
)
17812 if (min_address
> mp
->min_address
)
17813 mp
->min_address
= min_address
;
17817 /* We will adjust this below if it is too loose. */
17818 mp
->min_address
= min_address
;
17820 /* Unlink MP from its current position. Since min_mp is non-null,
17821 mp->next must be non-null. */
17822 mp
->next
->prev
= mp
->prev
;
17823 if (mp
->prev
!= NULL
)
17824 mp
->prev
->next
= mp
->next
;
17826 minipool_vector_head
= mp
->next
;
17828 /* Reinsert it after MIN_MP. */
17830 mp
->next
= min_mp
->next
;
17832 if (mp
->next
!= NULL
)
17833 mp
->next
->prev
= mp
;
17835 minipool_vector_tail
= mp
;
17841 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17843 mp
->offset
= offset
;
17844 if (mp
->refcount
> 0)
17845 offset
+= mp
->fix_size
;
17847 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
17848 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
17854 /* Add a constant to the minipool for a backward reference. Returns the
17855 node added or NULL if the constant will not fit in this pool.
17857 Note that the code for insertion for a backwards reference can be
17858 somewhat confusing because the calculated offsets for each fix do
17859 not take into account the size of the pool (which is still under
17862 add_minipool_backward_ref (Mfix
*fix
)
17864 /* If set, min_mp is the last pool_entry that has a lower constraint
17865 than the one we are trying to add. */
17866 Mnode
*min_mp
= NULL
;
17867 /* This can be negative, since it is only a constraint. */
17868 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
17871 /* If we can't reach the current pool from this insn, or if we can't
17872 insert this entry at the end of the pool without pushing other
17873 fixes out of range, then we don't try. This ensures that we
17874 can't fail later on. */
17875 if (min_address
>= minipool_barrier
->address
17876 || (minipool_vector_tail
->min_address
+ fix
->fix_size
17877 >= minipool_barrier
->address
))
17880 /* Scan the pool to see if a constant with the same value has
17881 already been added. While we are doing this, also note the
17882 location where we must insert the constant if it doesn't already
17884 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
17886 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
17887 && fix
->mode
== mp
->mode
17888 && (!LABEL_P (fix
->value
)
17889 || (CODE_LABEL_NUMBER (fix
->value
)
17890 == CODE_LABEL_NUMBER (mp
->value
)))
17891 && rtx_equal_p (fix
->value
, mp
->value
)
17892 /* Check that there is enough slack to move this entry to the
17893 end of the table (this is conservative). */
17894 && (mp
->max_address
17895 > (minipool_barrier
->address
17896 + minipool_vector_tail
->offset
17897 + minipool_vector_tail
->fix_size
)))
17900 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
17903 if (min_mp
!= NULL
)
17904 mp
->min_address
+= fix
->fix_size
;
17907 /* Note the insertion point if necessary. */
17908 if (mp
->min_address
< min_address
)
17910 /* For now, we do not allow the insertion of 8-byte alignment
17911 requiring nodes anywhere but at the start of the pool. */
17912 if (ARM_DOUBLEWORD_ALIGN
17913 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
17918 else if (mp
->max_address
17919 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
17921 /* Inserting before this entry would push the fix beyond
17922 its maximum address (which can happen if we have
17923 re-located a forwards fix); force the new fix to come
17925 if (ARM_DOUBLEWORD_ALIGN
17926 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
17931 min_address
= mp
->min_address
+ fix
->fix_size
;
17934 /* Do not insert a non-8-byte aligned quantity before 8-byte
17935 aligned quantities. */
17936 else if (ARM_DOUBLEWORD_ALIGN
17937 && fix
->fix_size
< 8
17938 && mp
->fix_size
>= 8)
17941 min_address
= mp
->min_address
+ fix
->fix_size
;
17946 /* We need to create a new entry. */
17948 mp
->fix_size
= fix
->fix_size
;
17949 mp
->mode
= fix
->mode
;
17950 mp
->value
= fix
->value
;
17952 mp
->max_address
= minipool_barrier
->address
+ 65536;
17954 mp
->min_address
= min_address
;
17956 if (min_mp
== NULL
)
17959 mp
->next
= minipool_vector_head
;
17961 if (mp
->next
== NULL
)
17963 minipool_vector_tail
= mp
;
17964 minipool_vector_label
= gen_label_rtx ();
17967 mp
->next
->prev
= mp
;
17969 minipool_vector_head
= mp
;
17973 mp
->next
= min_mp
->next
;
17977 if (mp
->next
!= NULL
)
17978 mp
->next
->prev
= mp
;
17980 minipool_vector_tail
= mp
;
17983 /* Save the new entry. */
17991 /* Scan over the following entries and adjust their offsets. */
17992 while (mp
->next
!= NULL
)
17994 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
17995 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
17998 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
18000 mp
->next
->offset
= mp
->offset
;
18009 assign_minipool_offsets (Mfix
*barrier
)
18011 HOST_WIDE_INT offset
= 0;
18014 minipool_barrier
= barrier
;
18016 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
18018 mp
->offset
= offset
;
18020 if (mp
->refcount
> 0)
18021 offset
+= mp
->fix_size
;
18025 /* Output the literal table */
18027 dump_minipool (rtx_insn
*scan
)
18033 if (ARM_DOUBLEWORD_ALIGN
)
18034 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
18035 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
18042 fprintf (dump_file
,
18043 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
18044 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
18046 scan
= emit_label_after (gen_label_rtx (), scan
);
18047 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
18048 scan
= emit_label_after (minipool_vector_label
, scan
);
18050 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
18052 if (mp
->refcount
> 0)
18056 fprintf (dump_file
,
18057 ";; Offset %u, min %ld, max %ld ",
18058 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
18059 (unsigned long) mp
->max_address
);
18060 arm_print_value (dump_file
, mp
->value
);
18061 fputc ('\n', dump_file
);
18064 rtx val
= copy_rtx (mp
->value
);
18066 switch (GET_MODE_SIZE (mp
->mode
))
18068 #ifdef HAVE_consttable_1
18070 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
18074 #ifdef HAVE_consttable_2
18076 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
18080 #ifdef HAVE_consttable_4
18082 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
18086 #ifdef HAVE_consttable_8
18088 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
18092 #ifdef HAVE_consttable_16
18094 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
18099 gcc_unreachable ();
18107 minipool_vector_head
= minipool_vector_tail
= NULL
;
18108 scan
= emit_insn_after (gen_consttable_end (), scan
);
18109 scan
= emit_barrier_after (scan
);
18112 /* Return the cost of forcibly inserting a barrier after INSN. */
18114 arm_barrier_cost (rtx_insn
*insn
)
18116 /* Basing the location of the pool on the loop depth is preferable,
18117 but at the moment, the basic block information seems to be
18118 corrupt by this stage of the compilation. */
18119 int base_cost
= 50;
18120 rtx_insn
*next
= next_nonnote_insn (insn
);
18122 if (next
!= NULL
&& LABEL_P (next
))
18125 switch (GET_CODE (insn
))
18128 /* It will always be better to place the table before the label, rather
18137 return base_cost
- 10;
18140 return base_cost
+ 10;
18144 /* Find the best place in the insn stream in the range
18145 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18146 Create the barrier by inserting a jump and add a new fix entry for
18149 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
18151 HOST_WIDE_INT count
= 0;
18152 rtx_barrier
*barrier
;
18153 rtx_insn
*from
= fix
->insn
;
18154 /* The instruction after which we will insert the jump. */
18155 rtx_insn
*selected
= NULL
;
18157 /* The address at which the jump instruction will be placed. */
18158 HOST_WIDE_INT selected_address
;
18160 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
18161 rtx_code_label
*label
= gen_label_rtx ();
18163 selected_cost
= arm_barrier_cost (from
);
18164 selected_address
= fix
->address
;
18166 while (from
&& count
< max_count
)
18168 rtx_jump_table_data
*tmp
;
18171 /* This code shouldn't have been called if there was a natural barrier
18173 gcc_assert (!BARRIER_P (from
));
18175 /* Count the length of this insn. This must stay in sync with the
18176 code that pushes minipool fixes. */
18177 if (LABEL_P (from
))
18178 count
+= get_label_padding (from
);
18180 count
+= get_attr_length (from
);
18182 /* If there is a jump table, add its length. */
18183 if (tablejump_p (from
, NULL
, &tmp
))
18185 count
+= get_jump_table_size (tmp
);
18187 /* Jump tables aren't in a basic block, so base the cost on
18188 the dispatch insn. If we select this location, we will
18189 still put the pool after the table. */
18190 new_cost
= arm_barrier_cost (from
);
18192 if (count
< max_count
18193 && (!selected
|| new_cost
<= selected_cost
))
18196 selected_cost
= new_cost
;
18197 selected_address
= fix
->address
+ count
;
18200 /* Continue after the dispatch table. */
18201 from
= NEXT_INSN (tmp
);
18205 new_cost
= arm_barrier_cost (from
);
18207 if (count
< max_count
18208 && (!selected
|| new_cost
<= selected_cost
))
18211 selected_cost
= new_cost
;
18212 selected_address
= fix
->address
+ count
;
18215 from
= NEXT_INSN (from
);
18218 /* Make sure that we found a place to insert the jump. */
18219 gcc_assert (selected
);
18221 /* Create a new JUMP_INSN that branches around a barrier. */
18222 from
= emit_jump_insn_after (gen_jump (label
), selected
);
18223 JUMP_LABEL (from
) = label
;
18224 barrier
= emit_barrier_after (from
);
18225 emit_label_after (label
, barrier
);
18227 /* Create a minipool barrier entry for the new barrier. */
18228 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
18229 new_fix
->insn
= barrier
;
18230 new_fix
->address
= selected_address
;
18231 new_fix
->next
= fix
->next
;
18232 fix
->next
= new_fix
;
18237 /* Record that there is a natural barrier in the insn stream at
18240 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
18242 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
18245 fix
->address
= address
;
18248 if (minipool_fix_head
!= NULL
)
18249 minipool_fix_tail
->next
= fix
;
18251 minipool_fix_head
= fix
;
18253 minipool_fix_tail
= fix
;
18256 /* Record INSN, which will need fixing up to load a value from the
18257 minipool. ADDRESS is the offset of the insn since the start of the
18258 function; LOC is a pointer to the part of the insn which requires
18259 fixing; VALUE is the constant that must be loaded, which is of type
18262 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
18263 machine_mode mode
, rtx value
)
18265 gcc_assert (!arm_disable_literal_pool
);
18266 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
18269 fix
->address
= address
;
18272 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
18273 fix
->value
= value
;
18274 fix
->forwards
= get_attr_pool_range (insn
);
18275 fix
->backwards
= get_attr_neg_pool_range (insn
);
18276 fix
->minipool
= NULL
;
18278 /* If an insn doesn't have a range defined for it, then it isn't
18279 expecting to be reworked by this code. Better to stop now than
18280 to generate duff assembly code. */
18281 gcc_assert (fix
->forwards
|| fix
->backwards
);
18283 /* If an entry requires 8-byte alignment then assume all constant pools
18284 require 4 bytes of padding. Trying to do this later on a per-pool
18285 basis is awkward because existing pool entries have to be modified. */
18286 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
18291 fprintf (dump_file
,
18292 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18293 GET_MODE_NAME (mode
),
18294 INSN_UID (insn
), (unsigned long) address
,
18295 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
18296 arm_print_value (dump_file
, fix
->value
);
18297 fprintf (dump_file
, "\n");
18300 /* Add it to the chain of fixes. */
18303 if (minipool_fix_head
!= NULL
)
18304 minipool_fix_tail
->next
= fix
;
18306 minipool_fix_head
= fix
;
18308 minipool_fix_tail
= fix
;
18311 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18312 Returns the number of insns needed, or 99 if we always want to synthesize
18315 arm_max_const_double_inline_cost ()
18317 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
18320 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18321 Returns the number of insns needed, or 99 if we don't know how to
18324 arm_const_double_inline_cost (rtx val
)
18326 rtx lowpart
, highpart
;
18329 mode
= GET_MODE (val
);
18331 if (mode
== VOIDmode
)
18334 gcc_assert (GET_MODE_SIZE (mode
) == 8);
18336 lowpart
= gen_lowpart (SImode
, val
);
18337 highpart
= gen_highpart_mode (SImode
, mode
, val
);
18339 gcc_assert (CONST_INT_P (lowpart
));
18340 gcc_assert (CONST_INT_P (highpart
));
18342 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
18343 NULL_RTX
, NULL_RTX
, 0, 0)
18344 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
18345 NULL_RTX
, NULL_RTX
, 0, 0));
18348 /* Cost of loading a SImode constant. */
18350 arm_const_inline_cost (enum rtx_code code
, rtx val
)
18352 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
18353 NULL_RTX
, NULL_RTX
, 1, 0);
18356 /* Return true if it is worthwhile to split a 64-bit constant into two
18357 32-bit operations. This is the case if optimizing for size, or
18358 if we have load delay slots, or if one 32-bit part can be done with
18359 a single data operation. */
18361 arm_const_double_by_parts (rtx val
)
18363 machine_mode mode
= GET_MODE (val
);
18366 if (optimize_size
|| arm_ld_sched
)
18369 if (mode
== VOIDmode
)
18372 part
= gen_highpart_mode (SImode
, mode
, val
);
18374 gcc_assert (CONST_INT_P (part
));
18376 if (const_ok_for_arm (INTVAL (part
))
18377 || const_ok_for_arm (~INTVAL (part
)))
18380 part
= gen_lowpart (SImode
, val
);
18382 gcc_assert (CONST_INT_P (part
));
18384 if (const_ok_for_arm (INTVAL (part
))
18385 || const_ok_for_arm (~INTVAL (part
)))
18391 /* Return true if it is possible to inline both the high and low parts
18392 of a 64-bit constant into 32-bit data processing instructions. */
18394 arm_const_double_by_immediates (rtx val
)
18396 machine_mode mode
= GET_MODE (val
);
18399 if (mode
== VOIDmode
)
18402 part
= gen_highpart_mode (SImode
, mode
, val
);
18404 gcc_assert (CONST_INT_P (part
));
18406 if (!const_ok_for_arm (INTVAL (part
)))
18409 part
= gen_lowpart (SImode
, val
);
18411 gcc_assert (CONST_INT_P (part
));
18413 if (!const_ok_for_arm (INTVAL (part
)))
18419 /* Scan INSN and note any of its operands that need fixing.
18420 If DO_PUSHES is false we do not actually push any of the fixups
18423 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
18427 extract_constrain_insn (insn
);
18429 if (recog_data
.n_alternatives
== 0)
18432 /* Fill in recog_op_alt with information about the constraints of
18434 preprocess_constraints (insn
);
18436 const operand_alternative
*op_alt
= which_op_alt ();
18437 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
18439 /* Things we need to fix can only occur in inputs. */
18440 if (recog_data
.operand_type
[opno
] != OP_IN
)
18443 /* If this alternative is a memory reference, then any mention
18444 of constants in this alternative is really to fool reload
18445 into allowing us to accept one there. We need to fix them up
18446 now so that we output the right code. */
18447 if (op_alt
[opno
].memory_ok
)
18449 rtx op
= recog_data
.operand
[opno
];
18451 if (CONSTANT_P (op
))
18454 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
18455 recog_data
.operand_mode
[opno
], op
);
18457 else if (MEM_P (op
)
18458 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
18459 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
18463 rtx cop
= avoid_constant_pool_reference (op
);
18465 /* Casting the address of something to a mode narrower
18466 than a word can cause avoid_constant_pool_reference()
18467 to return the pool reference itself. That's no good to
18468 us here. Lets just hope that we can use the
18469 constant pool value directly. */
18471 cop
= get_pool_constant (XEXP (op
, 0));
18473 push_minipool_fix (insn
, address
,
18474 recog_data
.operand_loc
[opno
],
18475 recog_data
.operand_mode
[opno
], cop
);
18485 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18486 and unions in the context of ARMv8-M Security Extensions. It is used as a
18487 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18488 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18489 or four masks, depending on whether it is being computed for a
18490 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18491 respectively. The tree for the type of the argument or a field within an
18492 argument is passed in ARG_TYPE, the current register this argument or field
18493 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18494 argument or field starts at is passed in STARTING_BIT and the last used bit
18495 is kept in LAST_USED_BIT which is also updated accordingly. */
18497 static unsigned HOST_WIDE_INT
18498 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
18499 uint32_t * padding_bits_to_clear
,
18500 unsigned starting_bit
, int * last_used_bit
)
18503 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
18505 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
18507 unsigned current_bit
= starting_bit
;
18509 long int offset
, size
;
18512 field
= TYPE_FIELDS (arg_type
);
18515 /* The offset within a structure is always an offset from
18516 the start of that structure. Make sure we take that into the
18517 calculation of the register based offset that we use here. */
18518 offset
= starting_bit
;
18519 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
18522 /* This is the actual size of the field, for bitfields this is the
18523 bitfield width and not the container size. */
18524 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
18526 if (*last_used_bit
!= offset
)
18528 if (offset
< *last_used_bit
)
18530 /* This field's offset is before the 'last_used_bit', that
18531 means this field goes on the next register. So we need to
18532 pad the rest of the current register and increase the
18533 register number. */
18535 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
18538 padding_bits_to_clear
[*regno
] |= mask
;
18539 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18544 /* Otherwise we pad the bits between the last field's end and
18545 the start of the new field. */
18548 mask
= ((uint32_t)-1) >> (32 - offset
);
18549 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
18550 padding_bits_to_clear
[*regno
] |= mask
;
18552 current_bit
= offset
;
18555 /* Calculate further padding bits for inner structs/unions too. */
18556 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
18558 *last_used_bit
= current_bit
;
18559 not_to_clear_reg_mask
18560 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
18561 padding_bits_to_clear
, offset
,
18566 /* Update 'current_bit' with this field's size. If the
18567 'current_bit' lies in a subsequent register, update 'regno' and
18568 reset 'current_bit' to point to the current bit in that new
18570 current_bit
+= size
;
18571 while (current_bit
>= 32)
18574 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18577 *last_used_bit
= current_bit
;
18580 field
= TREE_CHAIN (field
);
18582 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18584 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
18586 tree field
, field_t
;
18587 int i
, regno_t
, field_size
;
18591 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
18592 = {-1, -1, -1, -1};
18594 /* To compute the padding bits in a union we only consider bits as
18595 padding bits if they are always either a padding bit or fall outside a
18596 fields size for all fields in the union. */
18597 field
= TYPE_FIELDS (arg_type
);
18600 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
18601 = {0U, 0U, 0U, 0U};
18602 int last_used_bit_t
= *last_used_bit
;
18604 field_t
= TREE_TYPE (field
);
18606 /* If the field's type is either a record or a union make sure to
18607 compute their padding bits too. */
18608 if (RECORD_OR_UNION_TYPE_P (field_t
))
18609 not_to_clear_reg_mask
18610 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
18611 &padding_bits_to_clear_t
[0],
18612 starting_bit
, &last_used_bit_t
);
18615 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
18616 regno_t
= (field_size
/ 32) + *regno
;
18617 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
18620 for (i
= *regno
; i
< regno_t
; i
++)
18622 /* For all but the last register used by this field only keep the
18623 padding bits that were padding bits in this field. */
18624 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
18627 /* For the last register, keep all padding bits that were padding
18628 bits in this field and any padding bits that are still valid
18629 as padding bits but fall outside of this field's size. */
18630 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
18631 padding_bits_to_clear_res
[regno_t
]
18632 &= padding_bits_to_clear_t
[regno_t
] | mask
;
18634 /* Update the maximum size of the fields in terms of registers used
18635 ('max_reg') and the 'last_used_bit' in said register. */
18636 if (max_reg
< regno_t
)
18639 max_bit
= last_used_bit_t
;
18641 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
18642 max_bit
= last_used_bit_t
;
18644 field
= TREE_CHAIN (field
);
18647 /* Update the current padding_bits_to_clear using the intersection of the
18648 padding bits of all the fields. */
18649 for (i
=*regno
; i
< max_reg
; i
++)
18650 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
18652 /* Do not keep trailing padding bits, we do not know yet whether this
18653 is the end of the argument. */
18654 mask
= ((uint32_t) 1 << max_bit
) - 1;
18655 padding_bits_to_clear
[max_reg
]
18656 |= padding_bits_to_clear_res
[max_reg
] & mask
;
18659 *last_used_bit
= max_bit
;
18662 /* This function should only be used for structs and unions. */
18663 gcc_unreachable ();
18665 return not_to_clear_reg_mask
;
18668 /* In the context of ARMv8-M Security Extensions, this function is used for both
18669 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18670 registers are used when returning or passing arguments, which is then
18671 returned as a mask. It will also compute a mask to indicate padding/unused
18672 bits for each of these registers, and passes this through the
18673 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18674 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18675 the starting register used to pass this argument or return value is passed
18676 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18677 for struct and union types. */
18679 static unsigned HOST_WIDE_INT
18680 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
18681 uint32_t * padding_bits_to_clear
)
18684 int last_used_bit
= 0;
18685 unsigned HOST_WIDE_INT not_to_clear_mask
;
18687 if (RECORD_OR_UNION_TYPE_P (arg_type
))
18690 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
18691 padding_bits_to_clear
, 0,
18695 /* If the 'last_used_bit' is not zero, that means we are still using a
18696 part of the last 'regno'. In such cases we must clear the trailing
18697 bits. Otherwise we are not using regno and we should mark it as to
18699 if (last_used_bit
!= 0)
18700 padding_bits_to_clear
[regno
]
18701 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
18703 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
18707 not_to_clear_mask
= 0;
18708 /* We are not dealing with structs nor unions. So these arguments may be
18709 passed in floating point registers too. In some cases a BLKmode is
18710 used when returning or passing arguments in multiple VFP registers. */
18711 if (GET_MODE (arg_rtx
) == BLKmode
)
18716 /* This should really only occur when dealing with the hard-float
18718 gcc_assert (TARGET_HARD_FLOAT_ABI
);
18720 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
18722 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
18723 gcc_assert (REG_P (reg
));
18725 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
18727 /* If we are dealing with DF mode, make sure we don't
18728 clear either of the registers it addresses. */
18729 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
18732 unsigned HOST_WIDE_INT mask
;
18733 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
18734 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
18735 not_to_clear_mask
|= mask
;
18741 /* Otherwise we can rely on the MODE to determine how many registers
18742 are being used by this argument. */
18743 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
18744 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
18747 unsigned HOST_WIDE_INT
18748 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
18749 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
18750 not_to_clear_mask
|= mask
;
18755 return not_to_clear_mask
;
18758 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18759 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18760 are to be fully cleared, using the value in register CLEARING_REG if more
18761 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18762 the bits that needs to be cleared in caller-saved core registers, with
18763 SCRATCH_REG used as a scratch register for that clearing.
18765 NOTE: one of three following assertions must hold:
18766 - SCRATCH_REG is a low register
18767 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18768 in TO_CLEAR_BITMAP)
18769 - CLEARING_REG is a low register. */
18772 cmse_clear_registers (sbitmap to_clear_bitmap
, uint32_t *padding_bits_to_clear
,
18773 int padding_bits_len
, rtx scratch_reg
, rtx clearing_reg
)
18775 bool saved_clearing
= false;
18776 rtx saved_clearing_reg
= NULL_RTX
;
18777 int i
, regno
, clearing_regno
, minregno
= R0_REGNUM
, maxregno
= minregno
- 1;
18779 gcc_assert (arm_arch_cmse
);
18781 if (!bitmap_empty_p (to_clear_bitmap
))
18783 minregno
= bitmap_first_set_bit (to_clear_bitmap
);
18784 maxregno
= bitmap_last_set_bit (to_clear_bitmap
);
18786 clearing_regno
= REGNO (clearing_reg
);
18788 /* Clear padding bits. */
18789 gcc_assert (padding_bits_len
<= NUM_ARG_REGS
);
18790 for (i
= 0, regno
= R0_REGNUM
; i
< padding_bits_len
; i
++, regno
++)
18793 rtx rtx16
, dest
, cleared_reg
= gen_rtx_REG (SImode
, regno
);
18795 if (padding_bits_to_clear
[i
] == 0)
18798 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18799 CLEARING_REG as scratch. */
18801 && REGNO (scratch_reg
) > LAST_LO_REGNUM
)
18803 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18804 such that we can use clearing_reg to clear the unused bits in the
18806 if ((clearing_regno
> maxregno
18807 || !bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
18808 && !saved_clearing
)
18810 gcc_assert (clearing_regno
<= LAST_LO_REGNUM
);
18811 emit_move_insn (scratch_reg
, clearing_reg
);
18812 saved_clearing
= true;
18813 saved_clearing_reg
= scratch_reg
;
18815 scratch_reg
= clearing_reg
;
18818 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18819 mask
= (~padding_bits_to_clear
[i
]) & 0xFFFF;
18820 emit_move_insn (scratch_reg
, gen_int_mode (mask
, SImode
));
18822 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18823 mask
= (~padding_bits_to_clear
[i
]) >> 16;
18824 rtx16
= gen_int_mode (16, SImode
);
18825 dest
= gen_rtx_ZERO_EXTRACT (SImode
, scratch_reg
, rtx16
, rtx16
);
18827 emit_insn (gen_rtx_SET (dest
, gen_int_mode (mask
, SImode
)));
18829 emit_insn (gen_andsi3 (cleared_reg
, cleared_reg
, scratch_reg
));
18831 if (saved_clearing
)
18832 emit_move_insn (clearing_reg
, saved_clearing_reg
);
18835 /* Clear full registers. */
18837 if (TARGET_HAVE_FPCXT_CMSE
)
18840 int i
, j
, k
, nb_regs
;
18841 rtx use_seq
, par
, reg
, set
, vunspec
;
18842 int to_clear_bitmap_size
= SBITMAP_SIZE (to_clear_bitmap
);
18843 auto_sbitmap
core_regs_bitmap (to_clear_bitmap_size
);
18844 auto_sbitmap
to_clear_core_bitmap (to_clear_bitmap_size
);
18846 for (i
= FIRST_VFP_REGNUM
; i
<= maxregno
; i
+= nb_regs
)
18848 /* Find next register to clear and exit if none. */
18849 for (; i
<= maxregno
&& !bitmap_bit_p (to_clear_bitmap
, i
); i
++);
18853 /* Compute number of consecutive registers to clear. */
18854 for (j
= i
; j
<= maxregno
&& bitmap_bit_p (to_clear_bitmap
, j
);
18858 /* Create VSCCLRM RTX pattern. */
18859 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nb_regs
+ 1));
18860 vunspec_vec
= gen_rtvec (1, gen_int_mode (0, SImode
));
18861 vunspec
= gen_rtx_UNSPEC_VOLATILE (SImode
, vunspec_vec
,
18862 VUNSPEC_VSCCLRM_VPR
);
18863 XVECEXP (par
, 0, 0) = vunspec
;
18865 /* Insert VFP register clearing RTX in the pattern. */
18867 for (k
= 1, j
= i
; j
<= maxregno
&& k
< nb_regs
+ 1; j
++)
18869 if (!bitmap_bit_p (to_clear_bitmap
, j
))
18872 reg
= gen_rtx_REG (SFmode
, j
);
18873 set
= gen_rtx_SET (reg
, const0_rtx
);
18874 XVECEXP (par
, 0, k
++) = set
;
18877 use_seq
= get_insns ();
18880 emit_insn_after (use_seq
, emit_insn (par
));
18883 /* Get set of core registers to clear. */
18884 bitmap_clear (core_regs_bitmap
);
18885 bitmap_set_range (core_regs_bitmap
, R0_REGNUM
,
18886 IP_REGNUM
- R0_REGNUM
+ 1);
18887 bitmap_and (to_clear_core_bitmap
, to_clear_bitmap
,
18889 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap
));
18891 if (bitmap_empty_p (to_clear_core_bitmap
))
18894 /* Create clrm RTX pattern. */
18895 nb_regs
= bitmap_count_bits (to_clear_core_bitmap
);
18896 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nb_regs
+ 2));
18898 /* Insert core register clearing RTX in the pattern. */
18900 for (j
= 0, i
= minregno
; j
< nb_regs
; i
++)
18902 if (!bitmap_bit_p (to_clear_core_bitmap
, i
))
18905 reg
= gen_rtx_REG (SImode
, i
);
18906 set
= gen_rtx_SET (reg
, const0_rtx
);
18907 XVECEXP (par
, 0, j
++) = set
;
18911 /* Insert APSR register clearing RTX in the pattern
18912 * along with clobbering CC. */
18913 vunspec_vec
= gen_rtvec (1, gen_int_mode (0, SImode
));
18914 vunspec
= gen_rtx_UNSPEC_VOLATILE (SImode
, vunspec_vec
,
18915 VUNSPEC_CLRM_APSR
);
18917 XVECEXP (par
, 0, j
++) = vunspec
;
18919 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
18920 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
18921 XVECEXP (par
, 0, j
) = clobber
;
18923 use_seq
= get_insns ();
18926 emit_insn_after (use_seq
, emit_insn (par
));
18930 /* If not marked for clearing, clearing_reg already does not contain
18932 if (clearing_regno
<= maxregno
18933 && bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
18935 emit_move_insn (clearing_reg
, const0_rtx
);
18936 emit_use (clearing_reg
);
18937 bitmap_clear_bit (to_clear_bitmap
, clearing_regno
);
18940 for (regno
= minregno
; regno
<= maxregno
; regno
++)
18942 if (!bitmap_bit_p (to_clear_bitmap
, regno
))
18945 if (IS_VFP_REGNUM (regno
))
18947 /* If regno is an even vfp register and its successor is also to
18948 be cleared, use vmov. */
18949 if (TARGET_VFP_DOUBLE
18950 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
18951 && bitmap_bit_p (to_clear_bitmap
, regno
+ 1))
18953 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
18954 CONST1_RTX (DFmode
));
18955 emit_use (gen_rtx_REG (DFmode
, regno
));
18960 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
18961 CONST1_RTX (SFmode
));
18962 emit_use (gen_rtx_REG (SFmode
, regno
));
18967 emit_move_insn (gen_rtx_REG (SImode
, regno
), clearing_reg
);
18968 emit_use (gen_rtx_REG (SImode
, regno
));
18974 /* Clear core and caller-saved VFP registers not used to pass arguments before
18975 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
18976 registers is done in the __gnu_cmse_nonsecure_call libcall. See
18977 libgcc/config/arm/cmse_nonsecure_call.S. */
18980 cmse_nonsecure_call_inline_register_clear (void)
18984 FOR_EACH_BB_FN (bb
, cfun
)
18988 FOR_BB_INSNS (bb
, insn
)
18990 bool clear_callee_saved
= TARGET_HAVE_FPCXT_CMSE
;
18991 /* frame = VFP regs + FPSCR + VPR. */
18992 unsigned lazy_store_stack_frame_size
18993 = (LAST_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1 + 2) * UNITS_PER_WORD
;
18994 unsigned long callee_saved_mask
18995 = ((1 << (LAST_HI_REGNUM
+ 1)) - 1)
18996 & ~((1 << (LAST_ARG_REGNUM
+ 1)) - 1);
18997 unsigned address_regnum
, regno
;
18998 unsigned max_int_regno
18999 = clear_callee_saved
? IP_REGNUM
: LAST_ARG_REGNUM
;
19000 unsigned max_fp_regno
19001 = TARGET_HAVE_FPCXT_CMSE
? LAST_VFP_REGNUM
: D7_VFP_REGNUM
;
19003 = TARGET_HARD_FLOAT_ABI
? max_fp_regno
: max_int_regno
;
19004 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
19006 rtx pat
, call
, unspec
, clearing_reg
, ip_reg
, shift
;
19008 CUMULATIVE_ARGS args_so_far_v
;
19009 cumulative_args_t args_so_far
;
19010 tree arg_type
, fntype
;
19011 bool first_param
= true, lazy_fpclear
= !TARGET_HARD_FLOAT_ABI
;
19012 function_args_iterator args_iter
;
19013 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
19015 if (!NONDEBUG_INSN_P (insn
))
19018 if (!CALL_P (insn
))
19021 pat
= PATTERN (insn
);
19022 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
19023 call
= XVECEXP (pat
, 0, 0);
19025 /* Get the real call RTX if the insn sets a value, ie. returns. */
19026 if (GET_CODE (call
) == SET
)
19027 call
= SET_SRC (call
);
19029 /* Check if it is a cmse_nonsecure_call. */
19030 unspec
= XEXP (call
, 0);
19031 if (GET_CODE (unspec
) != UNSPEC
19032 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
19035 /* Mark registers that needs to be cleared. Those that holds a
19036 parameter are removed from the set further below. */
19037 bitmap_clear (to_clear_bitmap
);
19038 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
,
19039 max_int_regno
- R0_REGNUM
+ 1);
19041 /* Only look at the caller-saved floating point registers in case of
19042 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
19043 lazy store and loads which clear both caller- and callee-saved
19047 auto_sbitmap
float_bitmap (maxregno
+ 1);
19049 bitmap_clear (float_bitmap
);
19050 bitmap_set_range (float_bitmap
, FIRST_VFP_REGNUM
,
19051 max_fp_regno
- FIRST_VFP_REGNUM
+ 1);
19052 bitmap_ior (to_clear_bitmap
, to_clear_bitmap
, float_bitmap
);
19055 /* Make sure the register used to hold the function address is not
19057 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
19058 gcc_assert (MEM_P (address
));
19059 gcc_assert (REG_P (XEXP (address
, 0)));
19060 address_regnum
= REGNO (XEXP (address
, 0));
19061 if (address_regnum
<= max_int_regno
)
19062 bitmap_clear_bit (to_clear_bitmap
, address_regnum
);
19064 /* Set basic block of call insn so that df rescan is performed on
19065 insns inserted here. */
19066 set_block_for_insn (insn
, bb
);
19067 df_set_flags (DF_DEFER_INSN_RESCAN
);
19070 /* Make sure the scheduler doesn't schedule other insns beyond
19072 emit_insn (gen_blockage ());
19074 /* Walk through all arguments and clear registers appropriately.
19076 fntype
= TREE_TYPE (MEM_EXPR (address
));
19077 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
19079 args_so_far
= pack_cumulative_args (&args_so_far_v
);
19080 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
19083 uint64_t to_clear_args_mask
;
19085 if (VOID_TYPE_P (arg_type
))
19088 function_arg_info
arg (arg_type
, /*named=*/true);
19090 /* ??? We should advance after processing the argument and pass
19091 the argument we're advancing past. */
19092 arm_function_arg_advance (args_so_far
, arg
);
19094 arg_rtx
= arm_function_arg (args_so_far
, arg
);
19095 gcc_assert (REG_P (arg_rtx
));
19097 = compute_not_to_clear_mask (arg_type
, arg_rtx
,
19099 &padding_bits_to_clear
[0]);
19100 if (to_clear_args_mask
)
19102 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
19104 if (to_clear_args_mask
& (1ULL << regno
))
19105 bitmap_clear_bit (to_clear_bitmap
, regno
);
19109 first_param
= false;
19112 /* We use right shift and left shift to clear the LSB of the address
19113 we jump to instead of using bic, to avoid having to use an extra
19114 register on Thumb-1. */
19115 clearing_reg
= XEXP (address
, 0);
19116 shift
= gen_rtx_LSHIFTRT (SImode
, clearing_reg
, const1_rtx
);
19117 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
19118 shift
= gen_rtx_ASHIFT (SImode
, clearing_reg
, const1_rtx
);
19119 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
19121 if (clear_callee_saved
)
19124 emit_multi_reg_push (callee_saved_mask
, callee_saved_mask
);
19125 /* Disable frame debug info in push because it needs to be
19126 disabled for pop (see below). */
19127 RTX_FRAME_RELATED_P (push_insn
) = 0;
19129 /* Lazy store multiple. */
19133 rtx_insn
*add_insn
;
19135 imm
= gen_int_mode (- lazy_store_stack_frame_size
, SImode
);
19136 add_insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
19137 stack_pointer_rtx
, imm
));
19138 /* If we have the frame pointer, then it will be the
19139 CFA reg. Otherwise, the stack pointer is the CFA
19140 reg, so we need to emit a CFA adjust. */
19141 if (!frame_pointer_needed
)
19142 arm_add_cfa_adjust_cfa_note (add_insn
,
19143 - lazy_store_stack_frame_size
,
19145 stack_pointer_rtx
);
19146 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx
));
19148 /* Save VFP callee-saved registers. */
19151 vfp_emit_fstmd (D7_VFP_REGNUM
+ 1,
19152 (max_fp_regno
- D7_VFP_REGNUM
) / 2);
19153 /* Disable frame debug info in push because it needs to be
19154 disabled for vpop (see below). */
19155 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19159 /* Clear caller-saved registers that leak before doing a non-secure
19161 ip_reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
19162 cmse_clear_registers (to_clear_bitmap
, padding_bits_to_clear
,
19163 NUM_ARG_REGS
, ip_reg
, clearing_reg
);
19165 seq
= get_insns ();
19167 emit_insn_before (seq
, insn
);
19169 if (TARGET_HAVE_FPCXT_CMSE
)
19171 rtx_insn
*last
, *pop_insn
, *after
= insn
;
19175 /* Lazy load multiple done as part of libcall in Armv8-M. */
19178 rtx imm
= gen_int_mode (lazy_store_stack_frame_size
, SImode
);
19179 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx
));
19180 rtx_insn
*add_insn
=
19181 emit_insn (gen_addsi3 (stack_pointer_rtx
,
19182 stack_pointer_rtx
, imm
));
19183 if (!frame_pointer_needed
)
19184 arm_add_cfa_adjust_cfa_note (add_insn
,
19185 lazy_store_stack_frame_size
,
19187 stack_pointer_rtx
);
19189 /* Restore VFP callee-saved registers. */
19192 int nb_callee_saved_vfp_regs
=
19193 (max_fp_regno
- D7_VFP_REGNUM
) / 2;
19194 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM
+ 1,
19195 nb_callee_saved_vfp_regs
,
19196 stack_pointer_rtx
);
19197 /* Disable frame debug info in vpop because the SP adjustment
19198 is made using a CFA adjustment note while CFA used is
19199 sometimes R7. This then causes an assert failure in the
19200 CFI note creation code. */
19201 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19204 arm_emit_multi_reg_pop (callee_saved_mask
);
19205 pop_insn
= get_last_insn ();
19207 /* Disable frame debug info in pop because they reset the state
19208 of popped registers to what it was at the beginning of the
19209 function, before the prologue. This leads to incorrect state
19210 when doing the pop after the nonsecure call for registers that
19211 are pushed both in prologue and before the nonsecure call.
19213 It also occasionally triggers an assert failure in CFI note
19214 creation code when there are two codepaths to the epilogue,
19215 one of which does not go through the nonsecure call.
19216 Obviously this mean that debugging between the push and pop is
19218 RTX_FRAME_RELATED_P (pop_insn
) = 0;
19220 seq
= get_insns ();
19221 last
= get_last_insn ();
19224 emit_insn_after (seq
, after
);
19226 /* Skip pop we have just inserted after nonsecure call, we know
19227 it does not contain a nonsecure call. */
19234 /* Rewrite move insn into subtract of 0 if the condition codes will
19235 be useful in next conditional jump insn. */
19238 thumb1_reorg (void)
19242 FOR_EACH_BB_FN (bb
, cfun
)
19245 rtx cmp
, op0
, op1
, set
= NULL
;
19246 rtx_insn
*prev
, *insn
= BB_END (bb
);
19247 bool insn_clobbered
= false;
19249 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
19250 insn
= PREV_INSN (insn
);
19252 /* Find the last cbranchsi4_insn in basic block BB. */
19253 if (insn
== BB_HEAD (bb
)
19254 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
19257 /* Get the register with which we are comparing. */
19258 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
19259 op0
= XEXP (cmp
, 0);
19260 op1
= XEXP (cmp
, 1);
19262 /* Check that comparison is against ZERO. */
19263 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
19266 /* Find the first flag setting insn before INSN in basic block BB. */
19267 gcc_assert (insn
!= BB_HEAD (bb
));
19268 for (prev
= PREV_INSN (insn
);
19270 && prev
!= BB_HEAD (bb
)
19272 || DEBUG_INSN_P (prev
)
19273 || ((set
= single_set (prev
)) != NULL
19274 && get_attr_conds (prev
) == CONDS_NOCOND
)));
19275 prev
= PREV_INSN (prev
))
19277 if (reg_set_p (op0
, prev
))
19278 insn_clobbered
= true;
19281 /* Skip if op0 is clobbered by insn other than prev. */
19282 if (insn_clobbered
)
19288 dest
= SET_DEST (set
);
19289 src
= SET_SRC (set
);
19290 if (!low_register_operand (dest
, SImode
)
19291 || !low_register_operand (src
, SImode
))
19294 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19295 in INSN. Both src and dest of the move insn are checked. */
19296 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
19298 dest
= copy_rtx (dest
);
19299 src
= copy_rtx (src
);
19300 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
19301 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
19302 INSN_CODE (prev
) = -1;
19303 /* Set test register in INSN to dest. */
19304 XEXP (cmp
, 0) = copy_rtx (dest
);
19305 INSN_CODE (insn
) = -1;
19310 /* Convert instructions to their cc-clobbering variant if possible, since
19311 that allows us to use smaller encodings. */
19314 thumb2_reorg (void)
19319 INIT_REG_SET (&live
);
19321 /* We are freeing block_for_insn in the toplev to keep compatibility
19322 with old MDEP_REORGS that are not CFG based. Recompute it now. */
19323 compute_bb_for_insn ();
19326 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
19328 FOR_EACH_BB_FN (bb
, cfun
)
19330 if ((current_tune
->disparage_flag_setting_t16_encodings
19331 == tune_params::DISPARAGE_FLAGS_ALL
)
19332 && optimize_bb_for_speed_p (bb
))
19336 Convert_Action action
= SKIP
;
19337 Convert_Action action_for_partial_flag_setting
19338 = ((current_tune
->disparage_flag_setting_t16_encodings
19339 != tune_params::DISPARAGE_FLAGS_NEITHER
)
19340 && optimize_bb_for_speed_p (bb
))
19343 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
19344 df_simulate_initialize_backwards (bb
, &live
);
19345 FOR_BB_INSNS_REVERSE (bb
, insn
)
19347 if (NONJUMP_INSN_P (insn
)
19348 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
19349 && GET_CODE (PATTERN (insn
)) == SET
)
19352 rtx pat
= PATTERN (insn
);
19353 rtx dst
= XEXP (pat
, 0);
19354 rtx src
= XEXP (pat
, 1);
19355 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
19357 if (UNARY_P (src
) || BINARY_P (src
))
19358 op0
= XEXP (src
, 0);
19360 if (BINARY_P (src
))
19361 op1
= XEXP (src
, 1);
19363 if (low_register_operand (dst
, SImode
))
19365 switch (GET_CODE (src
))
19368 /* Adding two registers and storing the result
19369 in the first source is already a 16-bit
19371 if (rtx_equal_p (dst
, op0
)
19372 && register_operand (op1
, SImode
))
19375 if (low_register_operand (op0
, SImode
))
19377 /* ADDS <Rd>,<Rn>,<Rm> */
19378 if (low_register_operand (op1
, SImode
))
19380 /* ADDS <Rdn>,#<imm8> */
19381 /* SUBS <Rdn>,#<imm8> */
19382 else if (rtx_equal_p (dst
, op0
)
19383 && CONST_INT_P (op1
)
19384 && IN_RANGE (INTVAL (op1
), -255, 255))
19386 /* ADDS <Rd>,<Rn>,#<imm3> */
19387 /* SUBS <Rd>,<Rn>,#<imm3> */
19388 else if (CONST_INT_P (op1
)
19389 && IN_RANGE (INTVAL (op1
), -7, 7))
19392 /* ADCS <Rd>, <Rn> */
19393 else if (GET_CODE (XEXP (src
, 0)) == PLUS
19394 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
19395 && low_register_operand (XEXP (XEXP (src
, 0), 1),
19397 && COMPARISON_P (op1
)
19398 && cc_register (XEXP (op1
, 0), VOIDmode
)
19399 && maybe_get_arm_condition_code (op1
) == ARM_CS
19400 && XEXP (op1
, 1) == const0_rtx
)
19405 /* RSBS <Rd>,<Rn>,#0
19406 Not handled here: see NEG below. */
19407 /* SUBS <Rd>,<Rn>,#<imm3>
19409 Not handled here: see PLUS above. */
19410 /* SUBS <Rd>,<Rn>,<Rm> */
19411 if (low_register_operand (op0
, SImode
)
19412 && low_register_operand (op1
, SImode
))
19417 /* MULS <Rdm>,<Rn>,<Rdm>
19418 As an exception to the rule, this is only used
19419 when optimizing for size since MULS is slow on all
19420 known implementations. We do not even want to use
19421 MULS in cold code, if optimizing for speed, so we
19422 test the global flag here. */
19423 if (!optimize_size
)
19425 /* Fall through. */
19429 /* ANDS <Rdn>,<Rm> */
19430 if (rtx_equal_p (dst
, op0
)
19431 && low_register_operand (op1
, SImode
))
19432 action
= action_for_partial_flag_setting
;
19433 else if (rtx_equal_p (dst
, op1
)
19434 && low_register_operand (op0
, SImode
))
19435 action
= action_for_partial_flag_setting
== SKIP
19436 ? SKIP
: SWAP_CONV
;
19442 /* ASRS <Rdn>,<Rm> */
19443 /* LSRS <Rdn>,<Rm> */
19444 /* LSLS <Rdn>,<Rm> */
19445 if (rtx_equal_p (dst
, op0
)
19446 && low_register_operand (op1
, SImode
))
19447 action
= action_for_partial_flag_setting
;
19448 /* ASRS <Rd>,<Rm>,#<imm5> */
19449 /* LSRS <Rd>,<Rm>,#<imm5> */
19450 /* LSLS <Rd>,<Rm>,#<imm5> */
19451 else if (low_register_operand (op0
, SImode
)
19452 && CONST_INT_P (op1
)
19453 && IN_RANGE (INTVAL (op1
), 0, 31))
19454 action
= action_for_partial_flag_setting
;
19458 /* RORS <Rdn>,<Rm> */
19459 if (rtx_equal_p (dst
, op0
)
19460 && low_register_operand (op1
, SImode
))
19461 action
= action_for_partial_flag_setting
;
19465 /* MVNS <Rd>,<Rm> */
19466 if (low_register_operand (op0
, SImode
))
19467 action
= action_for_partial_flag_setting
;
19471 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19472 if (low_register_operand (op0
, SImode
))
19477 /* MOVS <Rd>,#<imm8> */
19478 if (CONST_INT_P (src
)
19479 && IN_RANGE (INTVAL (src
), 0, 255))
19480 action
= action_for_partial_flag_setting
;
19484 /* MOVS and MOV<c> with registers have different
19485 encodings, so are not relevant here. */
19493 if (action
!= SKIP
)
19495 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
19496 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
19499 if (action
== SWAP_CONV
)
19501 src
= copy_rtx (src
);
19502 XEXP (src
, 0) = op1
;
19503 XEXP (src
, 1) = op0
;
19504 pat
= gen_rtx_SET (dst
, src
);
19505 vec
= gen_rtvec (2, pat
, clobber
);
19507 else /* action == CONV */
19508 vec
= gen_rtvec (2, pat
, clobber
);
19510 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
19511 INSN_CODE (insn
) = -1;
19515 if (NONDEBUG_INSN_P (insn
))
19516 df_simulate_one_insn_backwards (bb
, insn
, &live
);
19520 CLEAR_REG_SET (&live
);
19523 /* Gcc puts the pool in the wrong place for ARM, since we can only
19524 load addresses a limited distance around the pc. We do some
19525 special munging to move the constant pool values to the correct
19526 point in the code. */
19531 HOST_WIDE_INT address
= 0;
19535 cmse_nonsecure_call_inline_register_clear ();
19537 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19538 if (cfun
->is_thunk
)
19540 else if (TARGET_THUMB1
)
19542 else if (TARGET_THUMB2
)
19545 /* Ensure all insns that must be split have been split at this point.
19546 Otherwise, the pool placement code below may compute incorrect
19547 insn lengths. Note that when optimizing, all insns have already
19548 been split at this point. */
19550 split_all_insns_noflow ();
19552 /* Make sure we do not attempt to create a literal pool even though it should
19553 no longer be necessary to create any. */
19554 if (arm_disable_literal_pool
)
19557 minipool_fix_head
= minipool_fix_tail
= NULL
;
19559 /* The first insn must always be a note, or the code below won't
19560 scan it properly. */
19561 insn
= get_insns ();
19562 gcc_assert (NOTE_P (insn
));
19565 /* Scan all the insns and record the operands that will need fixing. */
19566 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
19568 if (BARRIER_P (insn
))
19569 push_minipool_barrier (insn
, address
);
19570 else if (INSN_P (insn
))
19572 rtx_jump_table_data
*table
;
19574 note_invalid_constants (insn
, address
, true);
19575 address
+= get_attr_length (insn
);
19577 /* If the insn is a vector jump, add the size of the table
19578 and skip the table. */
19579 if (tablejump_p (insn
, NULL
, &table
))
19581 address
+= get_jump_table_size (table
);
19585 else if (LABEL_P (insn
))
19586 /* Add the worst-case padding due to alignment. We don't add
19587 the _current_ padding because the minipool insertions
19588 themselves might change it. */
19589 address
+= get_label_padding (insn
);
19592 fix
= minipool_fix_head
;
19594 /* Now scan the fixups and perform the required changes. */
19599 Mfix
* last_added_fix
;
19600 Mfix
* last_barrier
= NULL
;
19603 /* Skip any further barriers before the next fix. */
19604 while (fix
&& BARRIER_P (fix
->insn
))
19607 /* No more fixes. */
19611 last_added_fix
= NULL
;
19613 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
19615 if (BARRIER_P (ftmp
->insn
))
19617 if (ftmp
->address
>= minipool_vector_head
->max_address
)
19620 last_barrier
= ftmp
;
19622 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
19625 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
19628 /* If we found a barrier, drop back to that; any fixes that we
19629 could have reached but come after the barrier will now go in
19630 the next mini-pool. */
19631 if (last_barrier
!= NULL
)
19633 /* Reduce the refcount for those fixes that won't go into this
19635 for (fdel
= last_barrier
->next
;
19636 fdel
&& fdel
!= ftmp
;
19639 fdel
->minipool
->refcount
--;
19640 fdel
->minipool
= NULL
;
19643 ftmp
= last_barrier
;
19647 /* ftmp is first fix that we can't fit into this pool and
19648 there no natural barriers that we could use. Insert a
19649 new barrier in the code somewhere between the previous
19650 fix and this one, and arrange to jump around it. */
19651 HOST_WIDE_INT max_address
;
19653 /* The last item on the list of fixes must be a barrier, so
19654 we can never run off the end of the list of fixes without
19655 last_barrier being set. */
19658 max_address
= minipool_vector_head
->max_address
;
19659 /* Check that there isn't another fix that is in range that
19660 we couldn't fit into this pool because the pool was
19661 already too large: we need to put the pool before such an
19662 instruction. The pool itself may come just after the
19663 fix because create_fix_barrier also allows space for a
19664 jump instruction. */
19665 if (ftmp
->address
< max_address
)
19666 max_address
= ftmp
->address
+ 1;
19668 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
19671 assign_minipool_offsets (last_barrier
);
19675 if (!BARRIER_P (ftmp
->insn
)
19676 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
19683 /* Scan over the fixes we have identified for this pool, fixing them
19684 up and adding the constants to the pool itself. */
19685 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
19686 this_fix
= this_fix
->next
)
19687 if (!BARRIER_P (this_fix
->insn
))
19690 = plus_constant (Pmode
,
19691 gen_rtx_LABEL_REF (VOIDmode
,
19692 minipool_vector_label
),
19693 this_fix
->minipool
->offset
);
19694 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
19697 dump_minipool (last_barrier
->insn
);
19701 /* From now on we must synthesize any constants that we can't handle
19702 directly. This can happen if the RTL gets split during final
19703 instruction generation. */
19704 cfun
->machine
->after_arm_reorg
= 1;
19706 /* Free the minipool memory. */
19707 obstack_free (&minipool_obstack
, minipool_startobj
);
19710 /* Routines to output assembly language. */
19712 /* Return string representation of passed in real value. */
19713 static const char *
19714 fp_const_from_val (REAL_VALUE_TYPE
*r
)
19716 if (!fp_consts_inited
)
19719 gcc_assert (real_equal (r
, &value_fp0
));
19723 /* OPERANDS[0] is the entire list of insns that constitute pop,
19724 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19725 is in the list, UPDATE is true iff the list contains explicit
19726 update of base register. */
19728 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
19734 const char *conditional
;
19735 int num_saves
= XVECLEN (operands
[0], 0);
19736 unsigned int regno
;
19737 unsigned int regno_base
= REGNO (operands
[1]);
19738 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
19741 offset
+= update
? 1 : 0;
19742 offset
+= return_pc
? 1 : 0;
19744 /* Is the base register in the list? */
19745 for (i
= offset
; i
< num_saves
; i
++)
19747 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
19748 /* If SP is in the list, then the base register must be SP. */
19749 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
19750 /* If base register is in the list, there must be no explicit update. */
19751 if (regno
== regno_base
)
19752 gcc_assert (!update
);
19755 conditional
= reverse
? "%?%D0" : "%?%d0";
19756 /* Can't use POP if returning from an interrupt. */
19757 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
19758 sprintf (pattern
, "pop%s\t{", conditional
);
19761 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19762 It's just a convention, their semantics are identical. */
19763 if (regno_base
== SP_REGNUM
)
19764 sprintf (pattern
, "ldmfd%s\t", conditional
);
19766 sprintf (pattern
, "ldmia%s\t", conditional
);
19768 sprintf (pattern
, "ldm%s\t", conditional
);
19770 strcat (pattern
, reg_names
[regno_base
]);
19772 strcat (pattern
, "!, {");
19774 strcat (pattern
, ", {");
19777 /* Output the first destination register. */
19779 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
19781 /* Output the rest of the destination registers. */
19782 for (i
= offset
+ 1; i
< num_saves
; i
++)
19784 strcat (pattern
, ", ");
19786 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
19789 strcat (pattern
, "}");
19791 if (interrupt_p
&& return_pc
)
19792 strcat (pattern
, "^");
19794 output_asm_insn (pattern
, &cond
);
19798 /* Output the assembly for a store multiple. */
19801 vfp_output_vstmd (rtx
* operands
)
19807 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
19808 ? XEXP (operands
[0], 0)
19809 : XEXP (XEXP (operands
[0], 0), 0);
19810 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
19813 strcpy (pattern
, "vpush%?.64\t{%P1");
19815 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
19817 p
= strlen (pattern
);
19819 gcc_assert (REG_P (operands
[1]));
19821 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
19822 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
19824 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
19826 strcpy (&pattern
[p
], "}");
19828 output_asm_insn (pattern
, operands
);
19833 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19834 number of bytes pushed. */
19837 vfp_emit_fstmd (int base_reg
, int count
)
19844 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19845 register pairs are stored by a store multiple insn. We avoid this
19846 by pushing an extra pair. */
19847 if (count
== 2 && !arm_arch6
)
19849 if (base_reg
== LAST_VFP_REGNUM
- 3)
19854 /* FSTMD may not store more than 16 doubleword registers at once. Split
19855 larger stores into multiple parts (up to a maximum of two, in
19860 /* NOTE: base_reg is an internal register number, so each D register
19862 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
19863 saved
+= vfp_emit_fstmd (base_reg
, 16);
19867 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
19868 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
19870 reg
= gen_rtx_REG (DFmode
, base_reg
);
19873 XVECEXP (par
, 0, 0)
19874 = gen_rtx_SET (gen_frame_mem
19876 gen_rtx_PRE_MODIFY (Pmode
,
19879 (Pmode
, stack_pointer_rtx
,
19882 gen_rtx_UNSPEC (BLKmode
,
19883 gen_rtvec (1, reg
),
19884 UNSPEC_PUSH_MULT
));
19886 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19887 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
19888 RTX_FRAME_RELATED_P (tmp
) = 1;
19889 XVECEXP (dwarf
, 0, 0) = tmp
;
19891 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
19892 RTX_FRAME_RELATED_P (tmp
) = 1;
19893 XVECEXP (dwarf
, 0, 1) = tmp
;
19895 for (i
= 1; i
< count
; i
++)
19897 reg
= gen_rtx_REG (DFmode
, base_reg
);
19899 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
19901 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
19902 plus_constant (Pmode
,
19906 RTX_FRAME_RELATED_P (tmp
) = 1;
19907 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
19910 par
= emit_insn (par
);
19911 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
19912 RTX_FRAME_RELATED_P (par
) = 1;
19917 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19918 has the cmse_nonsecure_call attribute and returns false otherwise. */
19921 detect_cmse_nonsecure_call (tree addr
)
19926 tree fntype
= TREE_TYPE (addr
);
19927 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
19928 TYPE_ATTRIBUTES (fntype
)))
19934 /* Emit a call instruction with pattern PAT. ADDR is the address of
19935 the call target. */
19938 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
19942 insn
= emit_call_insn (pat
);
19944 /* The PIC register is live on entry to VxWorks PIC PLT entries.
19945 If the call might use such an entry, add a use of the PIC register
19946 to the instruction's CALL_INSN_FUNCTION_USAGE. */
19947 if (TARGET_VXWORKS_RTP
19950 && SYMBOL_REF_P (addr
)
19951 && (SYMBOL_REF_DECL (addr
)
19952 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
19953 : !SYMBOL_REF_LOCAL_P (addr
)))
19955 require_pic_register (NULL_RTX
, false /*compute_now*/);
19956 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
19961 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
19962 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), fdpic_reg
);
19965 if (TARGET_AAPCS_BASED
)
19967 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19968 linker. We need to add an IP clobber to allow setting
19969 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
19970 is not needed since it's a fixed register. */
19971 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
19972 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
19976 /* Output a 'call' insn. */
19978 output_call (rtx
*operands
)
19980 gcc_assert (!arm_arch5t
); /* Patterns should call blx <reg> directly. */
19982 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
19983 if (REGNO (operands
[0]) == LR_REGNUM
)
19985 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
19986 output_asm_insn ("mov%?\t%0, %|lr", operands
);
19989 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
19991 if (TARGET_INTERWORK
|| arm_arch4t
)
19992 output_asm_insn ("bx%?\t%0", operands
);
19994 output_asm_insn ("mov%?\t%|pc, %0", operands
);
19999 /* Output a move from arm registers to arm registers of a long double
20000 OPERANDS[0] is the destination.
20001 OPERANDS[1] is the source. */
20003 output_mov_long_double_arm_from_arm (rtx
*operands
)
20005 /* We have to be careful here because the two might overlap. */
20006 int dest_start
= REGNO (operands
[0]);
20007 int src_start
= REGNO (operands
[1]);
20011 if (dest_start
< src_start
)
20013 for (i
= 0; i
< 3; i
++)
20015 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
20016 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
20017 output_asm_insn ("mov%?\t%0, %1", ops
);
20022 for (i
= 2; i
>= 0; i
--)
20024 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
20025 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
20026 output_asm_insn ("mov%?\t%0, %1", ops
);
20034 arm_emit_movpair (rtx dest
, rtx src
)
20036 /* If the src is an immediate, simplify it. */
20037 if (CONST_INT_P (src
))
20039 HOST_WIDE_INT val
= INTVAL (src
);
20040 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
20041 if ((val
>> 16) & 0x0000ffff)
20043 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
20045 GEN_INT ((val
>> 16) & 0x0000ffff));
20046 rtx_insn
*insn
= get_last_insn ();
20047 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
20051 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
20052 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
20053 rtx_insn
*insn
= get_last_insn ();
20054 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
20057 /* Output a move between double words. It must be REG<-MEM
20060 output_move_double (rtx
*operands
, bool emit
, int *count
)
20062 enum rtx_code code0
= GET_CODE (operands
[0]);
20063 enum rtx_code code1
= GET_CODE (operands
[1]);
20068 /* The only case when this might happen is when
20069 you are looking at the length of a DImode instruction
20070 that has an invalid constant in it. */
20071 if (code0
== REG
&& code1
!= MEM
)
20073 gcc_assert (!emit
);
20080 unsigned int reg0
= REGNO (operands
[0]);
20081 const bool can_ldrd
= TARGET_LDRD
&& (TARGET_THUMB2
|| (reg0
% 2 == 0));
20083 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
20085 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
20087 switch (GET_CODE (XEXP (operands
[1], 0)))
20094 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
20095 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
20097 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
20102 gcc_assert (can_ldrd
);
20104 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
20111 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
20113 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
20121 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
20123 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
20128 gcc_assert (can_ldrd
);
20130 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
20135 /* Autoicrement addressing modes should never have overlapping
20136 base and destination registers, and overlapping index registers
20137 are already prohibited, so this doesn't need to worry about
20139 otherops
[0] = operands
[0];
20140 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
20141 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
20143 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
20145 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
20147 /* Registers overlap so split out the increment. */
20150 gcc_assert (can_ldrd
);
20151 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
20152 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
20159 /* Use a single insn if we can.
20160 FIXME: IWMMXT allows offsets larger than ldrd can
20161 handle, fix these up with a pair of ldr. */
20164 || !CONST_INT_P (otherops
[2])
20165 || (INTVAL (otherops
[2]) > -256
20166 && INTVAL (otherops
[2]) < 256)))
20169 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
20175 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
20176 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
20186 /* Use a single insn if we can.
20187 FIXME: IWMMXT allows offsets larger than ldrd can handle,
20188 fix these up with a pair of ldr. */
20191 || !CONST_INT_P (otherops
[2])
20192 || (INTVAL (otherops
[2]) > -256
20193 && INTVAL (otherops
[2]) < 256)))
20196 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
20202 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
20203 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
20213 /* We might be able to use ldrd %0, %1 here. However the range is
20214 different to ldr/adr, and it is broken on some ARMv7-M
20215 implementations. */
20216 /* Use the second register of the pair to avoid problematic
20218 otherops
[1] = operands
[1];
20220 output_asm_insn ("adr%?\t%0, %1", otherops
);
20221 operands
[1] = otherops
[0];
20225 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
20227 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
20234 /* ??? This needs checking for thumb2. */
20236 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
20237 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
20239 otherops
[0] = operands
[0];
20240 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
20241 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
20243 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
20245 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
20247 switch ((int) INTVAL (otherops
[2]))
20251 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
20257 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
20263 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
20267 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
20268 operands
[1] = otherops
[0];
20270 && (REG_P (otherops
[2])
20272 || (CONST_INT_P (otherops
[2])
20273 && INTVAL (otherops
[2]) > -256
20274 && INTVAL (otherops
[2]) < 256)))
20276 if (reg_overlap_mentioned_p (operands
[0],
20279 /* Swap base and index registers over to
20280 avoid a conflict. */
20281 std::swap (otherops
[1], otherops
[2]);
20283 /* If both registers conflict, it will usually
20284 have been fixed by a splitter. */
20285 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
20286 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
20290 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20291 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
20298 otherops
[0] = operands
[0];
20300 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
20305 if (CONST_INT_P (otherops
[2]))
20309 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
20310 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
20312 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20318 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20324 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
20331 return "ldrd%?\t%0, [%1]";
20333 return "ldmia%?\t%1, %M0";
20337 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
20338 /* Take care of overlapping base/data reg. */
20339 if (reg_mentioned_p (operands
[0], operands
[1]))
20343 output_asm_insn ("ldr%?\t%0, %1", otherops
);
20344 output_asm_insn ("ldr%?\t%0, %1", operands
);
20354 output_asm_insn ("ldr%?\t%0, %1", operands
);
20355 output_asm_insn ("ldr%?\t%0, %1", otherops
);
20365 /* Constraints should ensure this. */
20366 gcc_assert (code0
== MEM
&& code1
== REG
);
20367 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
20368 || (TARGET_ARM
&& TARGET_LDRD
));
20370 /* For TARGET_ARM the first source register of an STRD
20371 must be even. This is usually the case for double-word
20372 values but user assembly constraints can force an odd
20373 starting register. */
20374 bool allow_strd
= TARGET_LDRD
20375 && !(TARGET_ARM
&& (REGNO (operands
[1]) & 1) == 1);
20376 switch (GET_CODE (XEXP (operands
[0], 0)))
20382 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
20384 output_asm_insn ("stm%?\t%m0, %M1", operands
);
20389 gcc_assert (allow_strd
);
20391 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
20398 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
20400 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
20408 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
20410 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
20415 gcc_assert (allow_strd
);
20417 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
20422 otherops
[0] = operands
[1];
20423 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
20424 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
20426 /* IWMMXT allows offsets larger than strd can handle,
20427 fix these up with a pair of str. */
20429 && CONST_INT_P (otherops
[2])
20430 && (INTVAL(otherops
[2]) <= -256
20431 || INTVAL(otherops
[2]) >= 256))
20433 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
20437 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
20438 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
20447 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
20448 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
20454 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
20457 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
20462 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
20467 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
20468 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
20470 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
20474 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
20481 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
20488 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
20493 && (REG_P (otherops
[2])
20495 || (CONST_INT_P (otherops
[2])
20496 && INTVAL (otherops
[2]) > -256
20497 && INTVAL (otherops
[2]) < 256)))
20499 otherops
[0] = operands
[1];
20500 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
20502 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
20508 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
20509 otherops
[1] = operands
[1];
20512 output_asm_insn ("str%?\t%1, %0", operands
);
20513 output_asm_insn ("str%?\t%H1, %0", otherops
);
20523 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20524 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20527 output_move_quad (rtx
*operands
)
20529 if (REG_P (operands
[0]))
20531 /* Load, or reg->reg move. */
20533 if (MEM_P (operands
[1]))
20535 switch (GET_CODE (XEXP (operands
[1], 0)))
20538 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
20543 output_asm_insn ("adr%?\t%0, %1", operands
);
20544 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
20548 gcc_unreachable ();
20556 gcc_assert (REG_P (operands
[1]));
20558 dest
= REGNO (operands
[0]);
20559 src
= REGNO (operands
[1]);
20561 /* This seems pretty dumb, but hopefully GCC won't try to do it
20564 for (i
= 0; i
< 4; i
++)
20566 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
20567 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
20568 output_asm_insn ("mov%?\t%0, %1", ops
);
20571 for (i
= 3; i
>= 0; i
--)
20573 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
20574 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
20575 output_asm_insn ("mov%?\t%0, %1", ops
);
20581 gcc_assert (MEM_P (operands
[0]));
20582 gcc_assert (REG_P (operands
[1]));
20583 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
20585 switch (GET_CODE (XEXP (operands
[0], 0)))
20588 output_asm_insn ("stm%?\t%m0, %M1", operands
);
20592 gcc_unreachable ();
20599 /* Output a VFP load or store instruction. */
20602 output_move_vfp (rtx
*operands
)
20604 rtx reg
, mem
, addr
, ops
[2];
20605 int load
= REG_P (operands
[0]);
20606 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
20607 int sp
= (!TARGET_VFP_FP16INST
20608 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
20609 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
20614 reg
= operands
[!load
];
20615 mem
= operands
[load
];
20617 mode
= GET_MODE (reg
);
20619 gcc_assert (REG_P (reg
));
20620 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
20621 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
20627 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
20628 gcc_assert (MEM_P (mem
));
20630 addr
= XEXP (mem
, 0);
20632 switch (GET_CODE (addr
))
20635 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20636 ops
[0] = XEXP (addr
, 0);
20641 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20642 ops
[0] = XEXP (addr
, 0);
20647 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
20653 sprintf (buff
, templ
,
20654 load
? "ld" : "st",
20655 dp
? "64" : sp
? "32" : "16",
20657 integer_p
? "\t%@ int" : "");
20658 output_asm_insn (buff
, ops
);
20663 /* Output a Neon double-word or quad-word load or store, or a load
20664 or store for larger structure modes.
20666 WARNING: The ordering of elements is weird in big-endian mode,
20667 because the EABI requires that vectors stored in memory appear
20668 as though they were stored by a VSTM, as required by the EABI.
20669 GCC RTL defines element ordering based on in-memory order.
20670 This can be different from the architectural ordering of elements
20671 within a NEON register. The intrinsics defined in arm_neon.h use the
20672 NEON register element ordering, not the GCC RTL element ordering.
20674 For example, the in-memory ordering of a big-endian a quadword
20675 vector with 16-bit elements when stored from register pair {d0,d1}
20676 will be (lowest address first, d0[N] is NEON register element N):
20678 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20680 When necessary, quadword registers (dN, dN+1) are moved to ARM
20681 registers from rN in the order:
20683 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20685 So that STM/LDM can be used on vectors in ARM registers, and the
20686 same memory layout will result as if VSTM/VLDM were used.
20688 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20689 possible, which allows use of appropriate alignment tags.
20690 Note that the choice of "64" is independent of the actual vector
20691 element size; this size simply ensures that the behavior is
20692 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20694 Due to limitations of those instructions, use of VST1.64/VLD1.64
20695 is not possible if:
20696 - the address contains PRE_DEC, or
20697 - the mode refers to more than 4 double-word registers
20699 In those cases, it would be possible to replace VSTM/VLDM by a
20700 sequence of instructions; this is not currently implemented since
20701 this is not certain to actually improve performance. */
20704 output_move_neon (rtx
*operands
)
20706 rtx reg
, mem
, addr
, ops
[2];
20707 int regno
, nregs
, load
= REG_P (operands
[0]);
20712 reg
= operands
[!load
];
20713 mem
= operands
[load
];
20715 mode
= GET_MODE (reg
);
20717 gcc_assert (REG_P (reg
));
20718 regno
= REGNO (reg
);
20719 nregs
= REG_NREGS (reg
) / 2;
20720 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
20721 || NEON_REGNO_OK_FOR_QUAD (regno
));
20722 gcc_assert (VALID_NEON_DREG_MODE (mode
)
20723 || VALID_NEON_QREG_MODE (mode
)
20724 || VALID_NEON_STRUCT_MODE (mode
));
20725 gcc_assert (MEM_P (mem
));
20727 addr
= XEXP (mem
, 0);
20729 /* Strip off const from addresses like (const (plus (...))). */
20730 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
20731 addr
= XEXP (addr
, 0);
20733 switch (GET_CODE (addr
))
20736 /* We have to use vldm / vstm for too-large modes. */
20737 if (nregs
> 4 || (TARGET_HAVE_MVE
&& nregs
>= 2))
20739 templ
= "v%smia%%?\t%%0!, %%h1";
20740 ops
[0] = XEXP (addr
, 0);
20744 templ
= "v%s1.64\t%%h1, %%A0";
20751 /* We have to use vldm / vstm in this case, since there is no
20752 pre-decrement form of the vld1 / vst1 instructions. */
20753 templ
= "v%smdb%%?\t%%0!, %%h1";
20754 ops
[0] = XEXP (addr
, 0);
20759 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20760 gcc_unreachable ();
20763 /* We have to use vldm / vstm for too-large modes. */
20766 if (nregs
> 4 || (TARGET_HAVE_MVE
&& nregs
>= 2))
20767 templ
= "v%smia%%?\t%%m0, %%h1";
20769 templ
= "v%s1.64\t%%h1, %%A0";
20775 /* Fall through. */
20777 if (GET_CODE (addr
) == PLUS
)
20778 addr
= XEXP (addr
, 0);
20779 /* Fall through. */
20784 for (i
= 0; i
< nregs
; i
++)
20786 /* We're only using DImode here because it's a convenient
20788 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
20789 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
20790 if (reg_overlap_mentioned_p (ops
[0], mem
))
20792 gcc_assert (overlap
== -1);
20797 if (TARGET_HAVE_MVE
&& LABEL_REF_P (addr
))
20798 sprintf (buff
, "v%sr.64\t%%P0, %%1", load
? "ld" : "st");
20800 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
20801 output_asm_insn (buff
, ops
);
20806 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
20807 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
20808 if (TARGET_HAVE_MVE
&& LABEL_REF_P (addr
))
20809 sprintf (buff
, "v%sr.32\t%%P0, %%1", load
? "ld" : "st");
20811 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
20812 output_asm_insn (buff
, ops
);
20819 gcc_unreachable ();
20822 sprintf (buff
, templ
, load
? "ld" : "st");
20823 output_asm_insn (buff
, ops
);
20828 /* Compute and return the length of neon_mov<mode>, where <mode> is
20829 one of VSTRUCT modes: EI, OI, CI or XI. */
20831 arm_attr_length_move_neon (rtx_insn
*insn
)
20833 rtx reg
, mem
, addr
;
20837 extract_insn_cached (insn
);
20839 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
20841 mode
= GET_MODE (recog_data
.operand
[0]);
20852 gcc_unreachable ();
20856 load
= REG_P (recog_data
.operand
[0]);
20857 reg
= recog_data
.operand
[!load
];
20858 mem
= recog_data
.operand
[load
];
20860 gcc_assert (MEM_P (mem
));
20862 addr
= XEXP (mem
, 0);
20864 /* Strip off const from addresses like (const (plus (...))). */
20865 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
20866 addr
= XEXP (addr
, 0);
20868 if (LABEL_REF_P (addr
) || GET_CODE (addr
) == PLUS
)
20870 int insns
= REG_NREGS (reg
) / 2;
20877 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20881 arm_address_offset_is_imm (rtx_insn
*insn
)
20885 extract_insn_cached (insn
);
20887 if (REG_P (recog_data
.operand
[0]))
20890 mem
= recog_data
.operand
[0];
20892 gcc_assert (MEM_P (mem
));
20894 addr
= XEXP (mem
, 0);
20897 || (GET_CODE (addr
) == PLUS
20898 && REG_P (XEXP (addr
, 0))
20899 && CONST_INT_P (XEXP (addr
, 1))))
20905 /* Output an ADD r, s, #n where n may be too big for one instruction.
20906 If adding zero to one register, output nothing. */
20908 output_add_immediate (rtx
*operands
)
20910 HOST_WIDE_INT n
= INTVAL (operands
[2]);
20912 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
20915 output_multi_immediate (operands
,
20916 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20919 output_multi_immediate (operands
,
20920 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20927 /* Output a multiple immediate operation.
20928 OPERANDS is the vector of operands referred to in the output patterns.
20929 INSTR1 is the output pattern to use for the first constant.
20930 INSTR2 is the output pattern to use for subsequent constants.
20931 IMMED_OP is the index of the constant slot in OPERANDS.
20932 N is the constant value. */
20933 static const char *
20934 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
20935 int immed_op
, HOST_WIDE_INT n
)
20937 #if HOST_BITS_PER_WIDE_INT > 32
20943 /* Quick and easy output. */
20944 operands
[immed_op
] = const0_rtx
;
20945 output_asm_insn (instr1
, operands
);
20950 const char * instr
= instr1
;
20952 /* Note that n is never zero here (which would give no output). */
20953 for (i
= 0; i
< 32; i
+= 2)
20957 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
20958 output_asm_insn (instr
, operands
);
20968 /* Return the name of a shifter operation. */
20969 static const char *
20970 arm_shift_nmem(enum rtx_code code
)
20975 return ARM_LSL_NAME
;
20991 /* Return the appropriate ARM instruction for the operation code.
20992 The returned result should not be overwritten. OP is the rtx of the
20993 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20996 arithmetic_instr (rtx op
, int shift_first_arg
)
20998 switch (GET_CODE (op
))
21004 return shift_first_arg
? "rsb" : "sub";
21019 return arm_shift_nmem(GET_CODE(op
));
21022 gcc_unreachable ();
21026 /* Ensure valid constant shifts and return the appropriate shift mnemonic
21027 for the operation code. The returned result should not be overwritten.
21028 OP is the rtx code of the shift.
21029 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
21031 static const char *
21032 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
21035 enum rtx_code code
= GET_CODE (op
);
21040 if (!CONST_INT_P (XEXP (op
, 1)))
21042 output_operand_lossage ("invalid shift operand");
21047 *amountp
= 32 - INTVAL (XEXP (op
, 1));
21055 mnem
= arm_shift_nmem(code
);
21056 if (CONST_INT_P (XEXP (op
, 1)))
21058 *amountp
= INTVAL (XEXP (op
, 1));
21060 else if (REG_P (XEXP (op
, 1)))
21067 output_operand_lossage ("invalid shift operand");
21073 /* We never have to worry about the amount being other than a
21074 power of 2, since this case can never be reloaded from a reg. */
21075 if (!CONST_INT_P (XEXP (op
, 1)))
21077 output_operand_lossage ("invalid shift operand");
21081 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
21083 /* Amount must be a power of two. */
21084 if (*amountp
& (*amountp
- 1))
21086 output_operand_lossage ("invalid shift operand");
21090 *amountp
= exact_log2 (*amountp
);
21091 gcc_assert (IN_RANGE (*amountp
, 0, 31));
21092 return ARM_LSL_NAME
;
21095 output_operand_lossage ("invalid shift operand");
21099 /* This is not 100% correct, but follows from the desire to merge
21100 multiplication by a power of 2 with the recognizer for a
21101 shift. >=32 is not a valid shift for "lsl", so we must try and
21102 output a shift that produces the correct arithmetical result.
21103 Using lsr #32 is identical except for the fact that the carry bit
21104 is not set correctly if we set the flags; but we never use the
21105 carry bit from such an operation, so we can ignore that. */
21106 if (code
== ROTATERT
)
21107 /* Rotate is just modulo 32. */
21109 else if (*amountp
!= (*amountp
& 31))
21111 if (code
== ASHIFT
)
21116 /* Shifts of 0 are no-ops. */
21123 /* Output a .ascii pseudo-op, keeping track of lengths. This is
21124 because /bin/as is horribly restrictive. The judgement about
21125 whether or not each character is 'printable' (and can be output as
21126 is) or not (and must be printed with an octal escape) must be made
21127 with reference to the *host* character set -- the situation is
21128 similar to that discussed in the comments above pp_c_char in
21129 c-pretty-print.cc. */
21131 #define MAX_ASCII_LEN 51
21134 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
21137 int len_so_far
= 0;
21139 fputs ("\t.ascii\t\"", stream
);
21141 for (i
= 0; i
< len
; i
++)
21145 if (len_so_far
>= MAX_ASCII_LEN
)
21147 fputs ("\"\n\t.ascii\t\"", stream
);
21153 if (c
== '\\' || c
== '\"')
21155 putc ('\\', stream
);
21163 fprintf (stream
, "\\%03o", c
);
21168 fputs ("\"\n", stream
);
21172 /* Compute the register save mask for registers 0 through 12
21173 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
21175 static unsigned long
21176 arm_compute_save_reg0_reg12_mask (void)
21178 unsigned long func_type
= arm_current_func_type ();
21179 unsigned long save_reg_mask
= 0;
21182 if (IS_INTERRUPT (func_type
))
21184 unsigned int max_reg
;
21185 /* Interrupt functions must not corrupt any registers,
21186 even call clobbered ones. If this is a leaf function
21187 we can just examine the registers used by the RTL, but
21188 otherwise we have to assume that whatever function is
21189 called might clobber anything, and so we have to save
21190 all the call-clobbered registers as well. */
21191 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
21192 /* FIQ handlers have registers r8 - r12 banked, so
21193 we only need to check r0 - r7, Normal ISRs only
21194 bank r14 and r15, so we must check up to r12.
21195 r13 is the stack pointer which is always preserved,
21196 so we do not need to consider it here. */
21201 for (reg
= 0; reg
<= max_reg
; reg
++)
21202 if (reg_needs_saving_p (reg
))
21203 save_reg_mask
|= (1 << reg
);
21205 /* Also save the pic base register if necessary. */
21206 if (PIC_REGISTER_MAY_NEED_SAVING
21207 && crtl
->uses_pic_offset_table
)
21208 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21210 else if (IS_VOLATILE(func_type
))
21212 /* For noreturn functions we historically omitted register saves
21213 altogether. However this really messes up debugging. As a
21214 compromise save just the frame pointers. Combined with the link
21215 register saved elsewhere this should be sufficient to get
21217 if (frame_pointer_needed
)
21218 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21219 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
21220 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
21221 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
21222 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
21226 /* In the normal case we only need to save those registers
21227 which are call saved and which are used by this function. */
21228 for (reg
= 0; reg
<= 11; reg
++)
21229 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
21230 save_reg_mask
|= (1 << reg
);
21232 /* Handle the frame pointer as a special case. */
21233 if (frame_pointer_needed
)
21234 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21236 /* If we aren't loading the PIC register,
21237 don't stack it even though it may be live. */
21238 if (PIC_REGISTER_MAY_NEED_SAVING
21239 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
21240 || crtl
->uses_pic_offset_table
))
21241 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21243 /* The prologue will copy SP into R0, so save it. */
21244 if (IS_STACKALIGN (func_type
))
21245 save_reg_mask
|= 1;
21248 /* Save registers so the exception handler can modify them. */
21249 if (crtl
->calls_eh_return
)
21255 reg
= EH_RETURN_DATA_REGNO (i
);
21256 if (reg
== INVALID_REGNUM
)
21258 save_reg_mask
|= 1 << reg
;
21262 return save_reg_mask
;
21265 /* Return true if r3 is live at the start of the function. */
21268 arm_r3_live_at_start_p (void)
21270 /* Just look at cfg info, which is still close enough to correct at this
21271 point. This gives false positives for broken functions that might use
21272 uninitialized data that happens to be allocated in r3, but who cares? */
21273 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
21276 /* Compute the number of bytes used to store the static chain register on the
21277 stack, above the stack frame. We need to know this accurately to get the
21278 alignment of the rest of the stack frame correct. */
21281 arm_compute_static_chain_stack_bytes (void)
21283 /* Once the value is updated from the init value of -1, do not
21285 if (cfun
->machine
->static_chain_stack_bytes
!= -1)
21286 return cfun
->machine
->static_chain_stack_bytes
;
21288 /* See the defining assertion in arm_expand_prologue. */
21289 if (IS_NESTED (arm_current_func_type ())
21290 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21291 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21292 || flag_stack_clash_protection
)
21293 && !df_regs_ever_live_p (LR_REGNUM
)))
21294 && arm_r3_live_at_start_p ()
21295 && crtl
->args
.pretend_args_size
== 0)
21301 /* Compute a bit mask of which core registers need to be
21302 saved on the stack for the current function.
21303 This is used by arm_compute_frame_layout, which may add extra registers. */
21305 static unsigned long
21306 arm_compute_save_core_reg_mask (void)
21308 unsigned int save_reg_mask
= 0;
21309 unsigned long func_type
= arm_current_func_type ();
21312 if (IS_NAKED (func_type
))
21313 /* This should never really happen. */
21316 /* If we are creating a stack frame, then we must save the frame pointer,
21317 IP (which will hold the old stack pointer), LR and the PC. */
21318 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21320 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
21323 | (1 << PC_REGNUM
);
21325 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
21327 if (arm_current_function_pac_enabled_p ())
21328 save_reg_mask
|= 1 << IP_REGNUM
;
21330 /* Decide if we need to save the link register.
21331 Interrupt routines have their own banked link register,
21332 so they never need to save it.
21333 Otherwise if we do not use the link register we do not need to save
21334 it. If we are pushing other registers onto the stack however, we
21335 can save an instruction in the epilogue by pushing the link register
21336 now and then popping it back into the PC. This incurs extra memory
21337 accesses though, so we only do it when optimizing for size, and only
21338 if we know that we will not need a fancy return sequence. */
21339 if (df_regs_ever_live_p (LR_REGNUM
)
21342 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
21343 && !crtl
->tail_call_emit
21344 && !crtl
->calls_eh_return
))
21345 save_reg_mask
|= 1 << LR_REGNUM
;
21347 if (cfun
->machine
->lr_save_eliminated
)
21348 save_reg_mask
&= ~ (1 << LR_REGNUM
);
21350 if (TARGET_REALLY_IWMMXT
21351 && ((bit_count (save_reg_mask
)
21352 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
21353 arm_compute_static_chain_stack_bytes())
21356 /* The total number of registers that are going to be pushed
21357 onto the stack is odd. We need to ensure that the stack
21358 is 64-bit aligned before we start to save iWMMXt registers,
21359 and also before we start to create locals. (A local variable
21360 might be a double or long long which we will load/store using
21361 an iWMMXt instruction). Therefore we need to push another
21362 ARM register, so that the stack will be 64-bit aligned. We
21363 try to avoid using the arg registers (r0 -r3) as they might be
21364 used to pass values in a tail call. */
21365 for (reg
= 4; reg
<= 12; reg
++)
21366 if ((save_reg_mask
& (1 << reg
)) == 0)
21370 save_reg_mask
|= (1 << reg
);
21373 cfun
->machine
->sibcall_blocked
= 1;
21374 save_reg_mask
|= (1 << 3);
21378 /* We may need to push an additional register for use initializing the
21379 PIC base register. */
21380 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
21381 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
21383 reg
= thumb_find_work_register (1 << 4);
21384 if (!call_used_or_fixed_reg_p (reg
))
21385 save_reg_mask
|= (1 << reg
);
21388 return save_reg_mask
;
21391 /* Compute a bit mask of which core registers need to be
21392 saved on the stack for the current function. */
21393 static unsigned long
21394 thumb1_compute_save_core_reg_mask (void)
21396 unsigned long mask
;
21400 for (reg
= 0; reg
< 12; reg
++)
21401 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
21404 /* Handle the frame pointer as a special case. */
21405 if (frame_pointer_needed
)
21406 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21409 && !TARGET_SINGLE_PIC_BASE
21410 && arm_pic_register
!= INVALID_REGNUM
21411 && crtl
->uses_pic_offset_table
)
21412 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21414 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21415 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
21416 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
21418 /* LR will also be pushed if any lo regs are pushed. */
21419 if (mask
& 0xff || thumb_force_lr_save ())
21420 mask
|= (1 << LR_REGNUM
);
21422 bool call_clobbered_scratch
21423 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21424 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21426 /* Make sure we have a low work register if we need one. We will
21427 need one if we are going to push a high register, but we are not
21428 currently intending to push a low register. However if both the
21429 prologue and epilogue have a spare call-clobbered low register,
21430 then we won't need to find an additional work register. It does
21431 not need to be the same register in the prologue and
21433 if ((mask
& 0xff) == 0
21434 && !call_clobbered_scratch
21435 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
21437 /* Use thumb_find_work_register to choose which register
21438 we will use. If the register is live then we will
21439 have to push it. Use LAST_LO_REGNUM as our fallback
21440 choice for the register to select. */
21441 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
21442 /* Make sure the register returned by thumb_find_work_register is
21443 not part of the return value. */
21444 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
21445 reg
= LAST_LO_REGNUM
;
21447 if (callee_saved_reg_p (reg
))
21451 /* The 504 below is 8 bytes less than 512 because there are two possible
21452 alignment words. We can't tell here if they will be present or not so we
21453 have to play it safe and assume that they are. */
21454 if ((CALLER_INTERWORKING_SLOT_SIZE
+
21455 ROUND_UP_WORD (get_frame_size ()) +
21456 crtl
->outgoing_args_size
) >= 504)
21458 /* This is the same as the code in thumb1_expand_prologue() which
21459 determines which register to use for stack decrement. */
21460 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
21461 if (mask
& (1 << reg
))
21464 if (reg
> LAST_LO_REGNUM
)
21466 /* Make sure we have a register available for stack decrement. */
21467 mask
|= 1 << LAST_LO_REGNUM
;
21474 /* Return the number of bytes required to save VFP registers. */
21476 arm_get_vfp_saved_size (void)
21478 unsigned int regno
;
21483 /* Space for saved VFP registers. */
21484 if (TARGET_VFP_BASE
)
21487 for (regno
= FIRST_VFP_REGNUM
;
21488 regno
< LAST_VFP_REGNUM
;
21491 if (!reg_needs_saving_p (regno
) && !reg_needs_saving_p (regno
+ 1))
21495 /* Workaround ARM10 VFPr1 bug. */
21496 if (count
== 2 && !arm_arch6
)
21498 saved
+= count
* 8;
21507 if (count
== 2 && !arm_arch6
)
21509 saved
+= count
* 8;
21516 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21517 everything bar the final return instruction. If simple_return is true,
21518 then do not output epilogue, because it has already been emitted in RTL.
21520 Note: do not forget to update length attribute of corresponding insn pattern
21521 when changing assembly output (eg. length attribute of
21522 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21523 register clearing sequences). */
21525 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
21526 bool simple_return
)
21528 char conditional
[10];
21531 unsigned long live_regs_mask
;
21532 unsigned long func_type
;
21533 arm_stack_offsets
*offsets
;
21535 func_type
= arm_current_func_type ();
21537 if (IS_NAKED (func_type
))
21540 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
21542 /* If this function was declared non-returning, and we have
21543 found a tail call, then we have to trust that the called
21544 function won't return. */
21549 /* Otherwise, trap an attempted return by aborting. */
21551 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
21553 assemble_external_libcall (ops
[1]);
21554 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
21560 gcc_assert (!cfun
->calls_alloca
|| really_return
);
21562 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
21564 cfun
->machine
->return_used_this_function
= 1;
21566 offsets
= arm_get_frame_offsets ();
21567 live_regs_mask
= offsets
->saved_regs_mask
;
21569 if (!simple_return
&& live_regs_mask
)
21571 const char * return_reg
;
21573 /* If we do not have any special requirements for function exit
21574 (e.g. interworking) then we can load the return address
21575 directly into the PC. Otherwise we must load it into LR. */
21577 && !IS_CMSE_ENTRY (func_type
)
21578 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
21579 return_reg
= reg_names
[PC_REGNUM
];
21581 return_reg
= reg_names
[LR_REGNUM
];
21583 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
21585 /* There are three possible reasons for the IP register
21586 being saved. 1) a stack frame was created, in which case
21587 IP contains the old stack pointer, or 2) an ISR routine
21588 corrupted it, or 3) it was saved to align the stack on
21589 iWMMXt. In case 1, restore IP into SP, otherwise just
21591 if (frame_pointer_needed
)
21593 live_regs_mask
&= ~ (1 << IP_REGNUM
);
21594 live_regs_mask
|= (1 << SP_REGNUM
);
21597 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
21600 /* On some ARM architectures it is faster to use LDR rather than
21601 LDM to load a single register. On other architectures, the
21602 cost is the same. In 26 bit mode, or for exception handlers,
21603 we have to use LDM to load the PC so that the CPSR is also
21605 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
21606 if (live_regs_mask
== (1U << reg
))
21609 if (reg
<= LAST_ARM_REGNUM
21610 && (reg
!= LR_REGNUM
21612 || ! IS_INTERRUPT (func_type
)))
21614 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
21615 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
21622 /* Generate the load multiple instruction to restore the
21623 registers. Note we can get here, even if
21624 frame_pointer_needed is true, but only if sp already
21625 points to the base of the saved core registers. */
21626 if (live_regs_mask
& (1 << SP_REGNUM
))
21628 unsigned HOST_WIDE_INT stack_adjust
;
21630 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
21631 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
21633 if (stack_adjust
&& arm_arch5t
&& TARGET_ARM
)
21634 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
21637 /* If we can't use ldmib (SA110 bug),
21638 then try to pop r3 instead. */
21640 live_regs_mask
|= 1 << 3;
21642 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
21645 /* For interrupt returns we have to use an LDM rather than
21646 a POP so that we can use the exception return variant. */
21647 else if (IS_INTERRUPT (func_type
))
21648 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
21650 sprintf (instr
, "pop%s\t{", conditional
);
21652 p
= instr
+ strlen (instr
);
21654 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
21655 if (live_regs_mask
& (1 << reg
))
21657 int l
= strlen (reg_names
[reg
]);
21663 memcpy (p
, ", ", 2);
21667 memcpy (p
, "%|", 2);
21668 memcpy (p
+ 2, reg_names
[reg
], l
);
21672 if (live_regs_mask
& (1 << LR_REGNUM
))
21674 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
21675 /* If returning from an interrupt, restore the CPSR. */
21676 if (IS_INTERRUPT (func_type
))
21683 output_asm_insn (instr
, & operand
);
21685 /* See if we need to generate an extra instruction to
21686 perform the actual function return. */
21688 && func_type
!= ARM_FT_INTERWORKED
21689 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
21691 /* The return has already been handled
21692 by loading the LR into the PC. */
21699 switch ((int) ARM_FUNC_TYPE (func_type
))
21703 /* ??? This is wrong for unified assembly syntax. */
21704 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
21707 case ARM_FT_INTERWORKED
:
21708 gcc_assert (arm_arch5t
|| arm_arch4t
);
21709 sprintf (instr
, "bx%s\t%%|lr", conditional
);
21712 case ARM_FT_EXCEPTION
:
21713 /* ??? This is wrong for unified assembly syntax. */
21714 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
21718 if (IS_CMSE_ENTRY (func_type
))
21720 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21721 emitted by cmse_nonsecure_entry_clear_before_return () and the
21722 VSTR/VLDR instructions in the prologue and epilogue. */
21723 if (!TARGET_HAVE_FPCXT_CMSE
)
21725 /* Check if we have to clear the 'GE bits' which is only used if
21726 parallel add and subtraction instructions are available. */
21727 if (TARGET_INT_SIMD
)
21728 snprintf (instr
, sizeof (instr
),
21729 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
21731 snprintf (instr
, sizeof (instr
),
21732 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
21734 output_asm_insn (instr
, & operand
);
21735 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21737 if (TARGET_HARD_FLOAT
)
21739 /* Clear the cumulative exception-status bits (0-4,7) and
21740 the condition code bits (28-31) of the FPSCR. We need
21741 to remember to clear the first scratch register used
21742 (IP) and save and restore the second (r4).
21744 Important note: the length of the
21745 thumb2_cmse_entry_return insn pattern must account for
21746 the size of the below instructions. */
21747 output_asm_insn ("push\t{%|r4}", & operand
);
21748 output_asm_insn ("vmrs\t%|ip, fpscr", & operand
);
21749 output_asm_insn ("movw\t%|r4, #65376", & operand
);
21750 output_asm_insn ("movt\t%|r4, #4095", & operand
);
21751 output_asm_insn ("and\t%|ip, %|r4", & operand
);
21752 output_asm_insn ("vmsr\tfpscr, %|ip", & operand
);
21753 output_asm_insn ("pop\t{%|r4}", & operand
);
21754 output_asm_insn ("mov\t%|ip, %|lr", & operand
);
21757 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
21759 /* Use bx if it's available. */
21760 else if (arm_arch5t
|| arm_arch4t
)
21761 sprintf (instr
, "bx%s\t%%|lr", conditional
);
21763 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
21767 output_asm_insn (instr
, & operand
);
21773 /* Output in FILE asm statements needed to declare the NAME of the function
21774 defined by its DECL node. */
21777 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
21779 size_t cmse_name_len
;
21780 char *cmse_name
= 0;
21781 char cmse_prefix
[] = "__acle_se_";
21783 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21784 extra function label for each function with the 'cmse_nonsecure_entry'
21785 attribute. This extra function label should be prepended with
21786 '__acle_se_', telling the linker that it needs to create secure gateway
21787 veneers for this function. */
21788 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
21789 DECL_ATTRIBUTES (decl
)))
21791 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
21792 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
21793 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
21794 targetm
.asm_out
.globalize_label (file
, cmse_name
);
21796 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
21797 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
21800 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
21801 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21802 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21803 ASM_OUTPUT_LABEL (file
, name
);
21806 ASM_OUTPUT_LABEL (file
, cmse_name
);
21808 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
21811 /* Write the function name into the code section, directly preceding
21812 the function prologue.
21814 Code will be output similar to this:
21816 .ascii "arm_poke_function_name", 0
21819 .word 0xff000000 + (t1 - t0)
21820 arm_poke_function_name
21822 stmfd sp!, {fp, ip, lr, pc}
21825 When performing a stack backtrace, code can inspect the value
21826 of 'pc' stored at 'fp' + 0. If the trace function then looks
21827 at location pc - 12 and the top 8 bits are set, then we know
21828 that there is a function name embedded immediately preceding this
21829 location and has length ((pc[-3]) & 0xff000000).
21831 We assume that pc is declared as a pointer to an unsigned long.
21833 It is of no benefit to output the function name if we are assembling
21834 a leaf function. These function types will not contain a stack
21835 backtrace structure, therefore it is not possible to determine the
21838 arm_poke_function_name (FILE *stream
, const char *name
)
21840 unsigned long alignlength
;
21841 unsigned long length
;
21844 length
= strlen (name
) + 1;
21845 alignlength
= ROUND_UP_WORD (length
);
21847 ASM_OUTPUT_ASCII (stream
, name
, length
);
21848 ASM_OUTPUT_ALIGN (stream
, 2);
21849 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
21850 assemble_aligned_integer (UNITS_PER_WORD
, x
);
21853 /* Place some comments into the assembler stream
21854 describing the current function. */
21856 arm_output_function_prologue (FILE *f
)
21858 unsigned long func_type
;
21860 /* Sanity check. */
21861 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
21863 func_type
= arm_current_func_type ();
21865 switch ((int) ARM_FUNC_TYPE (func_type
))
21868 case ARM_FT_NORMAL
:
21870 case ARM_FT_INTERWORKED
:
21871 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
21874 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
21877 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
21879 case ARM_FT_EXCEPTION
:
21880 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
21884 if (IS_NAKED (func_type
))
21885 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21887 if (IS_VOLATILE (func_type
))
21888 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
21890 if (IS_NESTED (func_type
))
21891 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
21892 if (IS_STACKALIGN (func_type
))
21893 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21894 if (IS_CMSE_ENTRY (func_type
))
21895 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
21897 asm_fprintf (f
, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21898 (HOST_WIDE_INT
) crtl
->args
.size
,
21899 crtl
->args
.pretend_args_size
,
21900 (HOST_WIDE_INT
) get_frame_size ());
21902 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21903 frame_pointer_needed
,
21904 cfun
->machine
->uses_anonymous_args
);
21906 if (cfun
->machine
->lr_save_eliminated
)
21907 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
21909 if (crtl
->calls_eh_return
)
21910 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
21915 arm_output_function_epilogue (FILE *)
21917 arm_stack_offsets
*offsets
;
21923 /* Emit any call-via-reg trampolines that are needed for v4t support
21924 of call_reg and call_value_reg type insns. */
21925 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
21927 rtx label
= cfun
->machine
->call_via
[regno
];
21931 switch_to_section (function_section (current_function_decl
));
21932 targetm
.asm_out
.internal_label (asm_out_file
, "L",
21933 CODE_LABEL_NUMBER (label
));
21934 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
21938 /* ??? Probably not safe to set this here, since it assumes that a
21939 function will be emitted as assembly immediately after we generate
21940 RTL for it. This does not happen for inline functions. */
21941 cfun
->machine
->return_used_this_function
= 0;
21943 else /* TARGET_32BIT */
21945 /* We need to take into account any stack-frame rounding. */
21946 offsets
= arm_get_frame_offsets ();
21948 gcc_assert (!use_return_insn (FALSE
, NULL
)
21949 || (cfun
->machine
->return_used_this_function
!= 0)
21950 || offsets
->saved_regs
== offsets
->outgoing_args
21951 || frame_pointer_needed
);
21955 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21956 STR and STRD. If an even number of registers are being pushed, one
21957 or more STRD patterns are created for each register pair. If an
21958 odd number of registers are pushed, emit an initial STR followed by
21959 as many STRD instructions as are needed. This works best when the
21960 stack is initially 64-bit aligned (the normal case), since it
21961 ensures that each STRD is also 64-bit aligned. */
21963 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
21968 rtx par
= NULL_RTX
;
21969 rtx dwarf
= NULL_RTX
;
21973 num_regs
= bit_count (saved_regs_mask
);
21975 /* Must be at least one register to save, and can't save SP or PC. */
21976 gcc_assert (num_regs
> 0 && num_regs
<= 14);
21977 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
21978 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
21980 /* Create sequence for DWARF info. All the frame-related data for
21981 debugging is held in this wrapper. */
21982 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
21984 /* Describe the stack adjustment. */
21985 tmp
= gen_rtx_SET (stack_pointer_rtx
,
21986 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
21987 RTX_FRAME_RELATED_P (tmp
) = 1;
21988 XVECEXP (dwarf
, 0, 0) = tmp
;
21990 /* Find the first register. */
21991 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
21996 /* If there's an odd number of registers to push. Start off by
21997 pushing a single register. This ensures that subsequent strd
21998 operations are dword aligned (assuming that SP was originally
21999 64-bit aligned). */
22000 if ((num_regs
& 1) != 0)
22002 rtx reg
, mem
, insn
;
22004 reg
= gen_rtx_REG (SImode
, regno
);
22006 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
22007 stack_pointer_rtx
));
22009 mem
= gen_frame_mem (Pmode
,
22011 (Pmode
, stack_pointer_rtx
,
22012 plus_constant (Pmode
, stack_pointer_rtx
,
22015 tmp
= gen_rtx_SET (mem
, reg
);
22016 RTX_FRAME_RELATED_P (tmp
) = 1;
22017 insn
= emit_insn (tmp
);
22018 RTX_FRAME_RELATED_P (insn
) = 1;
22019 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22020 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
22021 RTX_FRAME_RELATED_P (tmp
) = 1;
22024 XVECEXP (dwarf
, 0, i
) = tmp
;
22028 while (i
< num_regs
)
22029 if (saved_regs_mask
& (1 << regno
))
22031 rtx reg1
, reg2
, mem1
, mem2
;
22032 rtx tmp0
, tmp1
, tmp2
;
22035 /* Find the register to pair with this one. */
22036 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
22040 reg1
= gen_rtx_REG (SImode
, regno
);
22041 reg2
= gen_rtx_REG (SImode
, regno2
);
22048 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22051 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22053 -4 * (num_regs
- 1)));
22054 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
22055 plus_constant (Pmode
, stack_pointer_rtx
,
22057 tmp1
= gen_rtx_SET (mem1
, reg1
);
22058 tmp2
= gen_rtx_SET (mem2
, reg2
);
22059 RTX_FRAME_RELATED_P (tmp0
) = 1;
22060 RTX_FRAME_RELATED_P (tmp1
) = 1;
22061 RTX_FRAME_RELATED_P (tmp2
) = 1;
22062 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
22063 XVECEXP (par
, 0, 0) = tmp0
;
22064 XVECEXP (par
, 0, 1) = tmp1
;
22065 XVECEXP (par
, 0, 2) = tmp2
;
22066 insn
= emit_insn (par
);
22067 RTX_FRAME_RELATED_P (insn
) = 1;
22068 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22072 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22075 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22078 tmp1
= gen_rtx_SET (mem1
, reg1
);
22079 tmp2
= gen_rtx_SET (mem2
, reg2
);
22080 RTX_FRAME_RELATED_P (tmp1
) = 1;
22081 RTX_FRAME_RELATED_P (tmp2
) = 1;
22082 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22083 XVECEXP (par
, 0, 0) = tmp1
;
22084 XVECEXP (par
, 0, 1) = tmp2
;
22088 /* Create unwind information. This is an approximation. */
22089 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
22090 plus_constant (Pmode
,
22094 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
22095 plus_constant (Pmode
,
22100 RTX_FRAME_RELATED_P (tmp1
) = 1;
22101 RTX_FRAME_RELATED_P (tmp2
) = 1;
22102 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
22103 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
22105 regno
= regno2
+ 1;
22113 /* STRD in ARM mode requires consecutive registers. This function emits STRD
22114 whenever possible, otherwise it emits single-word stores. The first store
22115 also allocates stack space for all saved registers, using writeback with
22116 post-addressing mode. All other stores use offset addressing. If no STRD
22117 can be emitted, this function emits a sequence of single-word stores,
22118 and not an STM as before, because single-word stores provide more freedom
22119 scheduling and can be turned into an STM by peephole optimizations. */
22121 arm_emit_strd_push (unsigned long saved_regs_mask
)
22124 int i
, j
, dwarf_index
= 0;
22126 rtx dwarf
= NULL_RTX
;
22127 rtx insn
= NULL_RTX
;
22130 /* TODO: A more efficient code can be emitted by changing the
22131 layout, e.g., first push all pairs that can use STRD to keep the
22132 stack aligned, and then push all other registers. */
22133 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22134 if (saved_regs_mask
& (1 << i
))
22137 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
22138 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
22139 gcc_assert (num_regs
> 0);
22141 /* Create sequence for DWARF info. */
22142 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
22144 /* For dwarf info, we generate explicit stack update. */
22145 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22146 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
22147 RTX_FRAME_RELATED_P (tmp
) = 1;
22148 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22150 /* Save registers. */
22151 offset
= - 4 * num_regs
;
22153 while (j
<= LAST_ARM_REGNUM
)
22154 if (saved_regs_mask
& (1 << j
))
22157 && (saved_regs_mask
& (1 << (j
+ 1))))
22159 /* Current register and previous register form register pair for
22160 which STRD can be generated. */
22163 /* Allocate stack space for all saved registers. */
22164 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22165 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
22166 mem
= gen_frame_mem (DImode
, tmp
);
22169 else if (offset
> 0)
22170 mem
= gen_frame_mem (DImode
,
22171 plus_constant (Pmode
,
22175 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
22177 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
22178 RTX_FRAME_RELATED_P (tmp
) = 1;
22179 tmp
= emit_insn (tmp
);
22181 /* Record the first store insn. */
22182 if (dwarf_index
== 1)
22185 /* Generate dwarf info. */
22186 mem
= gen_frame_mem (SImode
,
22187 plus_constant (Pmode
,
22190 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22191 RTX_FRAME_RELATED_P (tmp
) = 1;
22192 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22194 mem
= gen_frame_mem (SImode
,
22195 plus_constant (Pmode
,
22198 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
22199 RTX_FRAME_RELATED_P (tmp
) = 1;
22200 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22207 /* Emit a single word store. */
22210 /* Allocate stack space for all saved registers. */
22211 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22212 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
22213 mem
= gen_frame_mem (SImode
, tmp
);
22216 else if (offset
> 0)
22217 mem
= gen_frame_mem (SImode
,
22218 plus_constant (Pmode
,
22222 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
22224 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22225 RTX_FRAME_RELATED_P (tmp
) = 1;
22226 tmp
= emit_insn (tmp
);
22228 /* Record the first store insn. */
22229 if (dwarf_index
== 1)
22232 /* Generate dwarf info. */
22233 mem
= gen_frame_mem (SImode
,
22234 plus_constant(Pmode
,
22237 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22238 RTX_FRAME_RELATED_P (tmp
) = 1;
22239 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22248 /* Attach dwarf info to the first insn we generate. */
22249 gcc_assert (insn
!= NULL_RTX
);
22250 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22251 RTX_FRAME_RELATED_P (insn
) = 1;
22254 /* Generate and emit an insn that we will recognize as a push_multi.
22255 Unfortunately, since this insn does not reflect very well the actual
22256 semantics of the operation, we need to annotate the insn for the benefit
22257 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
22258 MASK for registers that should be annotated for DWARF2 frame unwind
22261 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
22264 int num_dwarf_regs
= 0;
22268 int dwarf_par_index
;
22271 /* We don't record the PC in the dwarf frame information. */
22272 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
22274 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22276 if (mask
& (1 << i
))
22278 if (dwarf_regs_mask
& (1 << i
))
22282 gcc_assert (num_regs
&& num_regs
<= 16);
22283 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
22285 /* For the body of the insn we are going to generate an UNSPEC in
22286 parallel with several USEs. This allows the insn to be recognized
22287 by the push_multi pattern in the arm.md file.
22289 The body of the insn looks something like this:
22292 (set (mem:BLK (pre_modify:SI (reg:SI sp)
22293 (const_int:SI <num>)))
22294 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22300 For the frame note however, we try to be more explicit and actually
22301 show each register being stored into the stack frame, plus a (single)
22302 decrement of the stack pointer. We do it this way in order to be
22303 friendly to the stack unwinding code, which only wants to see a single
22304 stack decrement per instruction. The RTL we generate for the note looks
22305 something like this:
22308 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22309 (set (mem:SI (reg:SI sp)) (reg:SI r4))
22310 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22311 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22315 FIXME:: In an ideal world the PRE_MODIFY would not exist and
22316 instead we'd have a parallel expression detailing all
22317 the stores to the various memory addresses so that debug
22318 information is more up-to-date. Remember however while writing
22319 this to take care of the constraints with the push instruction.
22321 Note also that this has to be taken care of for the VFP registers.
22323 For more see PR43399. */
22325 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
22326 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
22327 dwarf_par_index
= 1;
22329 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22331 if (mask
& (1 << i
))
22333 /* NOTE: Dwarf code emitter handle reg-reg copies correctly and in the
22334 following example reg-reg copy of SP to IP register is handled
22335 through .cfi_def_cfa_register directive and the .cfi_offset
22336 directive for IP register is skipped by dwarf code emitter.
22339 .cfi_def_cfa_register 12
22340 push {fp, ip, lr, pc}
22341 .cfi_offset 11, -16
22342 .cfi_offset 13, -12
22345 Where as Arm-specific .save directive handling is different to that
22346 of dwarf code emitter and it doesn't consider reg-reg copies while
22347 updating the register list. When PACBTI is enabled we manually
22348 updated the .save directive register list to use "ra_auth_code"
22349 (pseduo register 143) instead of IP register as shown in following
22353 .cfi_register 143, 12
22354 push {r3, r7, ip, lr}
22355 .save {r3, r7, ra_auth_code, lr}
22357 rtx dwarf_reg
= reg
= gen_rtx_REG (SImode
, i
);
22358 if (arm_current_function_pac_enabled_p () && i
== IP_REGNUM
)
22359 dwarf_reg
= gen_rtx_REG (SImode
, RA_AUTH_CODE
);
22361 XVECEXP (par
, 0, 0)
22362 = gen_rtx_SET (gen_frame_mem
22364 gen_rtx_PRE_MODIFY (Pmode
,
22367 (Pmode
, stack_pointer_rtx
,
22370 gen_rtx_UNSPEC (BLKmode
,
22371 gen_rtvec (1, reg
),
22372 UNSPEC_PUSH_MULT
));
22374 if (dwarf_regs_mask
& (1 << i
))
22376 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
22378 RTX_FRAME_RELATED_P (tmp
) = 1;
22379 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
22386 for (j
= 1, i
++; j
< num_regs
; i
++)
22388 if (mask
& (1 << i
))
22390 rtx dwarf_reg
= reg
= gen_rtx_REG (SImode
, i
);
22391 if (arm_current_function_pac_enabled_p () && i
== IP_REGNUM
)
22392 dwarf_reg
= gen_rtx_REG (SImode
, RA_AUTH_CODE
);
22394 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
22396 if (dwarf_regs_mask
& (1 << i
))
22399 = gen_rtx_SET (gen_frame_mem
22401 plus_constant (Pmode
, stack_pointer_rtx
,
22404 RTX_FRAME_RELATED_P (tmp
) = 1;
22405 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
22412 par
= emit_insn (par
);
22414 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22415 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
22416 RTX_FRAME_RELATED_P (tmp
) = 1;
22417 XVECEXP (dwarf
, 0, 0) = tmp
;
22419 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
22424 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22425 SIZE is the offset to be adjusted.
22426 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22428 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
22432 RTX_FRAME_RELATED_P (insn
) = 1;
22433 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
22434 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
22437 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22438 SAVED_REGS_MASK shows which registers need to be restored.
22440 Unfortunately, since this insn does not reflect very well the actual
22441 semantics of the operation, we need to annotate the insn for the benefit
22442 of DWARF2 frame unwind information. */
22444 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
22449 rtx dwarf
= NULL_RTX
;
22451 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
22455 offset_adj
= return_in_pc
? 1 : 0;
22456 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22457 if (saved_regs_mask
& (1 << i
))
22460 gcc_assert (num_regs
&& num_regs
<= 16);
22462 /* If SP is in reglist, then we don't emit SP update insn. */
22463 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
22465 /* The parallel needs to hold num_regs SETs
22466 and one SET for the stack update. */
22467 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
22470 XVECEXP (par
, 0, 0) = ret_rtx
;
22474 /* Increment the stack pointer, based on there being
22475 num_regs 4-byte registers to restore. */
22476 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22477 plus_constant (Pmode
,
22480 RTX_FRAME_RELATED_P (tmp
) = 1;
22481 XVECEXP (par
, 0, offset_adj
) = tmp
;
22484 /* Now restore every reg, which may include PC. */
22485 for (j
= 0, i
= 0; j
< num_regs
; i
++)
22486 if (saved_regs_mask
& (1 << i
))
22488 rtx dwarf_reg
= reg
= gen_rtx_REG (SImode
, i
);
22489 if (arm_current_function_pac_enabled_p () && i
== IP_REGNUM
)
22490 dwarf_reg
= gen_rtx_REG (SImode
, RA_AUTH_CODE
);
22491 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
22493 /* Emit single load with writeback. */
22494 tmp
= gen_frame_mem (SImode
,
22495 gen_rtx_POST_INC (Pmode
,
22496 stack_pointer_rtx
));
22497 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
22498 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, dwarf_reg
,
22503 tmp
= gen_rtx_SET (reg
,
22506 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
22507 RTX_FRAME_RELATED_P (tmp
) = 1;
22508 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
22510 /* We need to maintain a sequence for DWARF info too. As dwarf info
22511 should not have PC, skip PC. */
22512 if (i
!= PC_REGNUM
)
22513 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, dwarf_reg
, dwarf
);
22519 par
= emit_jump_insn (par
);
22521 par
= emit_insn (par
);
22523 REG_NOTES (par
) = dwarf
;
22525 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
22526 stack_pointer_rtx
, stack_pointer_rtx
);
22529 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22530 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22532 Unfortunately, since this insn does not reflect very well the actual
22533 semantics of the operation, we need to annotate the insn for the benefit
22534 of DWARF2 frame unwind information. */
22536 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
22540 rtx dwarf
= NULL_RTX
;
22543 gcc_assert (num_regs
&& num_regs
<= 32);
22545 /* Workaround ARM10 VFPr1 bug. */
22546 if (num_regs
== 2 && !arm_arch6
)
22548 if (first_reg
== 15)
22554 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22555 there could be up to 32 D-registers to restore.
22556 If there are more than 16 D-registers, make two recursive calls,
22557 each of which emits one pop_multi instruction. */
22560 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
22561 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
22565 /* The parallel needs to hold num_regs SETs
22566 and one SET for the stack update. */
22567 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
22569 /* Increment the stack pointer, based on there being
22570 num_regs 8-byte registers to restore. */
22571 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
22572 RTX_FRAME_RELATED_P (tmp
) = 1;
22573 XVECEXP (par
, 0, 0) = tmp
;
22575 /* Now show every reg that will be restored, using a SET for each. */
22576 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
22578 reg
= gen_rtx_REG (DFmode
, i
);
22580 tmp
= gen_rtx_SET (reg
,
22583 plus_constant (Pmode
, base_reg
, 8 * j
)));
22584 RTX_FRAME_RELATED_P (tmp
) = 1;
22585 XVECEXP (par
, 0, j
+ 1) = tmp
;
22587 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22592 par
= emit_insn (par
);
22593 REG_NOTES (par
) = dwarf
;
22595 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22596 if (REGNO (base_reg
) == IP_REGNUM
)
22598 RTX_FRAME_RELATED_P (par
) = 1;
22599 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
22602 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
22603 base_reg
, base_reg
);
22606 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22607 number of registers are being popped, multiple LDRD patterns are created for
22608 all register pairs. If odd number of registers are popped, last register is
22609 loaded by using LDR pattern. */
22611 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
22615 rtx par
= NULL_RTX
;
22616 rtx dwarf
= NULL_RTX
;
22617 rtx tmp
, reg
, tmp1
;
22618 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
22620 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22621 if (saved_regs_mask
& (1 << i
))
22624 gcc_assert (num_regs
&& num_regs
<= 16);
22626 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22627 to be popped. So, if num_regs is even, now it will become odd,
22628 and we can generate pop with PC. If num_regs is odd, it will be
22629 even now, and ldr with return can be generated for PC. */
22633 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
22635 /* Var j iterates over all the registers to gather all the registers in
22636 saved_regs_mask. Var i gives index of saved registers in stack frame.
22637 A PARALLEL RTX of register-pair is created here, so that pattern for
22638 LDRD can be matched. As PC is always last register to be popped, and
22639 we have already decremented num_regs if PC, we don't have to worry
22640 about PC in this loop. */
22641 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
22642 if (saved_regs_mask
& (1 << j
))
22644 /* Create RTX for memory load. */
22645 reg
= gen_rtx_REG (SImode
, j
);
22646 tmp
= gen_rtx_SET (reg
,
22647 gen_frame_mem (SImode
,
22648 plus_constant (Pmode
,
22649 stack_pointer_rtx
, 4 * i
)));
22650 RTX_FRAME_RELATED_P (tmp
) = 1;
22654 /* When saved-register index (i) is even, the RTX to be emitted is
22655 yet to be created. Hence create it first. The LDRD pattern we
22656 are generating is :
22657 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22658 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22659 where target registers need not be consecutive. */
22660 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22664 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22665 added as 0th element and if i is odd, reg_i is added as 1st element
22666 of LDRD pattern shown above. */
22667 XVECEXP (par
, 0, (i
% 2)) = tmp
;
22668 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22672 /* When saved-register index (i) is odd, RTXs for both the registers
22673 to be loaded are generated in above given LDRD pattern, and the
22674 pattern can be emitted now. */
22675 par
= emit_insn (par
);
22676 REG_NOTES (par
) = dwarf
;
22677 RTX_FRAME_RELATED_P (par
) = 1;
22683 /* If the number of registers pushed is odd AND return_in_pc is false OR
22684 number of registers are even AND return_in_pc is true, last register is
22685 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22686 then LDR with post increment. */
22688 /* Increment the stack pointer, based on there being
22689 num_regs 4-byte registers to restore. */
22690 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22691 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
22692 RTX_FRAME_RELATED_P (tmp
) = 1;
22693 tmp
= emit_insn (tmp
);
22696 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
22697 stack_pointer_rtx
, stack_pointer_rtx
);
22702 if (((num_regs
% 2) == 1 && !return_in_pc
)
22703 || ((num_regs
% 2) == 0 && return_in_pc
))
22705 /* Scan for the single register to be popped. Skip until the saved
22706 register is found. */
22707 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
22709 /* Gen LDR with post increment here. */
22710 tmp1
= gen_rtx_MEM (SImode
,
22711 gen_rtx_POST_INC (SImode
,
22712 stack_pointer_rtx
));
22713 set_mem_alias_set (tmp1
, get_frame_alias_set ());
22715 reg
= gen_rtx_REG (SImode
, j
);
22716 tmp
= gen_rtx_SET (reg
, tmp1
);
22717 RTX_FRAME_RELATED_P (tmp
) = 1;
22718 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22722 /* If return_in_pc, j must be PC_REGNUM. */
22723 gcc_assert (j
== PC_REGNUM
);
22724 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22725 XVECEXP (par
, 0, 0) = ret_rtx
;
22726 XVECEXP (par
, 0, 1) = tmp
;
22727 par
= emit_jump_insn (par
);
22731 par
= emit_insn (tmp
);
22732 REG_NOTES (par
) = dwarf
;
22733 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
22734 stack_pointer_rtx
, stack_pointer_rtx
);
22738 else if ((num_regs
% 2) == 1 && return_in_pc
)
22740 /* There are 2 registers to be popped. So, generate the pattern
22741 pop_multiple_with_stack_update_and_return to pop in PC. */
22742 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
22748 /* LDRD in ARM mode needs consecutive registers as operands. This function
22749 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22750 offset addressing and then generates one separate stack udpate. This provides
22751 more scheduling freedom, compared to writeback on every load. However,
22752 if the function returns using load into PC directly
22753 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22754 before the last load. TODO: Add a peephole optimization to recognize
22755 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22756 peephole optimization to merge the load at stack-offset zero
22757 with the stack update instruction using load with writeback
22758 in post-index addressing mode. */
22760 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
22764 rtx par
= NULL_RTX
;
22765 rtx dwarf
= NULL_RTX
;
22768 /* Restore saved registers. */
22769 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
22771 while (j
<= LAST_ARM_REGNUM
)
22772 if (saved_regs_mask
& (1 << j
))
22775 && (saved_regs_mask
& (1 << (j
+ 1)))
22776 && (j
+ 1) != PC_REGNUM
)
22778 /* Current register and next register form register pair for which
22779 LDRD can be generated. PC is always the last register popped, and
22780 we handle it separately. */
22782 mem
= gen_frame_mem (DImode
,
22783 plus_constant (Pmode
,
22787 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
22789 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
22790 tmp
= emit_insn (tmp
);
22791 RTX_FRAME_RELATED_P (tmp
) = 1;
22793 /* Generate dwarf info. */
22795 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22796 gen_rtx_REG (SImode
, j
),
22798 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22799 gen_rtx_REG (SImode
, j
+ 1),
22802 REG_NOTES (tmp
) = dwarf
;
22807 else if (j
!= PC_REGNUM
)
22809 /* Emit a single word load. */
22811 mem
= gen_frame_mem (SImode
,
22812 plus_constant (Pmode
,
22816 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
22818 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
22819 tmp
= emit_insn (tmp
);
22820 RTX_FRAME_RELATED_P (tmp
) = 1;
22822 /* Generate dwarf info. */
22823 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
22824 gen_rtx_REG (SImode
, j
),
22830 else /* j == PC_REGNUM */
22836 /* Update the stack. */
22839 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22840 plus_constant (Pmode
,
22843 tmp
= emit_insn (tmp
);
22844 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
22845 stack_pointer_rtx
, stack_pointer_rtx
);
22849 if (saved_regs_mask
& (1 << PC_REGNUM
))
22851 /* Only PC is to be popped. */
22852 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22853 XVECEXP (par
, 0, 0) = ret_rtx
;
22854 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
22855 gen_frame_mem (SImode
,
22856 gen_rtx_POST_INC (SImode
,
22857 stack_pointer_rtx
)));
22858 RTX_FRAME_RELATED_P (tmp
) = 1;
22859 XVECEXP (par
, 0, 1) = tmp
;
22860 par
= emit_jump_insn (par
);
22862 /* Generate dwarf info. */
22863 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22864 gen_rtx_REG (SImode
, PC_REGNUM
),
22866 REG_NOTES (par
) = dwarf
;
22867 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
22868 stack_pointer_rtx
, stack_pointer_rtx
);
22872 /* Calculate the size of the return value that is passed in registers. */
22874 arm_size_return_regs (void)
22878 if (crtl
->return_rtx
!= 0)
22879 mode
= GET_MODE (crtl
->return_rtx
);
22881 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
22883 return GET_MODE_SIZE (mode
);
22886 /* Return true if the current function needs to save/restore LR. */
22888 thumb_force_lr_save (void)
22890 return !cfun
->machine
->lr_save_eliminated
22892 || thumb_far_jump_used_p ()
22893 || df_regs_ever_live_p (LR_REGNUM
));
22896 /* We do not know if r3 will be available because
22897 we do have an indirect tailcall happening in this
22898 particular case. */
22900 is_indirect_tailcall_p (rtx call
)
22902 rtx pat
= PATTERN (call
);
22904 /* Indirect tail call. */
22905 pat
= XVECEXP (pat
, 0, 0);
22906 if (GET_CODE (pat
) == SET
)
22907 pat
= SET_SRC (pat
);
22909 pat
= XEXP (XEXP (pat
, 0), 0);
22910 return REG_P (pat
);
22913 /* Return true if r3 is used by any of the tail call insns in the
22914 current function. */
22916 any_sibcall_could_use_r3 (void)
22921 if (!crtl
->tail_call_emit
)
22923 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
22924 if (e
->flags
& EDGE_SIBCALL
)
22926 rtx_insn
*call
= BB_END (e
->src
);
22927 if (!CALL_P (call
))
22928 call
= prev_nonnote_nondebug_insn (call
);
22929 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
22930 if (find_regno_fusage (call
, USE
, 3)
22931 || is_indirect_tailcall_p (call
))
22938 /* Compute the distance from register FROM to register TO.
22939 These can be the arg pointer (26), the soft frame pointer (25),
22940 the stack pointer (13) or the hard frame pointer (11).
22941 In thumb mode r7 is used as the soft frame pointer, if needed.
22942 Typical stack layout looks like this:
22944 old stack pointer -> | |
22947 | | saved arguments for
22948 | | vararg functions
22951 hard FP & arg pointer -> | | \
22959 soft frame pointer -> | | /
22964 locals base pointer -> | | /
22969 current stack pointer -> | | /
22972 For a given function some or all of these stack components
22973 may not be needed, giving rise to the possibility of
22974 eliminating some of the registers.
22976 The values returned by this function must reflect the behavior
22977 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22979 The sign of the number returned reflects the direction of stack
22980 growth, so the values are positive for all eliminations except
22981 from the soft frame pointer to the hard frame pointer.
22983 SFP may point just inside the local variables block to ensure correct
22987 /* Return cached stack offsets. */
22989 static arm_stack_offsets
*
22990 arm_get_frame_offsets (void)
22992 struct arm_stack_offsets
*offsets
;
22994 offsets
= &cfun
->machine
->stack_offsets
;
23000 /* Calculate stack offsets. These are used to calculate register elimination
23001 offsets and in prologue/epilogue code. Also calculates which registers
23002 should be saved. */
23005 arm_compute_frame_layout (void)
23007 struct arm_stack_offsets
*offsets
;
23008 unsigned long func_type
;
23011 HOST_WIDE_INT frame_size
;
23014 offsets
= &cfun
->machine
->stack_offsets
;
23016 /* Initially this is the size of the local variables. It will translated
23017 into an offset once we have determined the size of preceding data. */
23018 frame_size
= ROUND_UP_WORD (get_frame_size ());
23020 /* Space for variadic functions. */
23021 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
23023 /* In Thumb mode this is incorrect, but never used. */
23025 = (offsets
->saved_args
23026 + arm_compute_static_chain_stack_bytes ()
23027 + (frame_pointer_needed
? 4 : 0));
23031 unsigned int regno
;
23033 offsets
->saved_regs_mask
= arm_compute_save_core_reg_mask ();
23034 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
23035 saved
= core_saved
;
23037 /* We know that SP will be doubleword aligned on entry, and we must
23038 preserve that condition at any subroutine call. We also require the
23039 soft frame pointer to be doubleword aligned. */
23041 if (TARGET_REALLY_IWMMXT
)
23043 /* Check for the call-saved iWMMXt registers. */
23044 for (regno
= FIRST_IWMMXT_REGNUM
;
23045 regno
<= LAST_IWMMXT_REGNUM
;
23047 if (reg_needs_saving_p (regno
))
23051 func_type
= arm_current_func_type ();
23052 /* Space for saved VFP registers. */
23053 if (! IS_VOLATILE (func_type
)
23054 && TARGET_VFP_BASE
)
23055 saved
+= arm_get_vfp_saved_size ();
23057 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
23058 nonecure entry functions with VSTR/VLDR. */
23059 if (TARGET_HAVE_FPCXT_CMSE
&& IS_CMSE_ENTRY (func_type
))
23062 else /* TARGET_THUMB1 */
23064 offsets
->saved_regs_mask
= thumb1_compute_save_core_reg_mask ();
23065 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
23066 saved
= core_saved
;
23067 if (TARGET_BACKTRACE
)
23071 /* Saved registers include the stack frame. */
23072 offsets
->saved_regs
23073 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
23074 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
23076 /* A leaf function does not need any stack alignment if it has nothing
23078 if (crtl
->is_leaf
&& frame_size
== 0
23079 /* However if it calls alloca(), we have a dynamically allocated
23080 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
23081 && ! cfun
->calls_alloca
)
23083 offsets
->outgoing_args
= offsets
->soft_frame
;
23084 offsets
->locals_base
= offsets
->soft_frame
;
23088 /* Ensure SFP has the correct alignment. */
23089 if (ARM_DOUBLEWORD_ALIGN
23090 && (offsets
->soft_frame
& 7))
23092 offsets
->soft_frame
+= 4;
23093 /* Try to align stack by pushing an extra reg. Don't bother doing this
23094 when there is a stack frame as the alignment will be rolled into
23095 the normal stack adjustment. */
23096 if (frame_size
+ crtl
->outgoing_args_size
== 0)
23100 /* Register r3 is caller-saved. Normally it does not need to be
23101 saved on entry by the prologue. However if we choose to save
23102 it for padding then we may confuse the compiler into thinking
23103 a prologue sequence is required when in fact it is not. This
23104 will occur when shrink-wrapping if r3 is used as a scratch
23105 register and there are no other callee-saved writes.
23107 This situation can be avoided when other callee-saved registers
23108 are available and r3 is not mandatory if we choose a callee-saved
23109 register for padding. */
23110 bool prefer_callee_reg_p
= false;
23112 /* If it is safe to use r3, then do so. This sometimes
23113 generates better code on Thumb-2 by avoiding the need to
23114 use 32-bit push/pop instructions. */
23115 if (! any_sibcall_could_use_r3 ()
23116 && arm_size_return_regs () <= 12
23117 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
23119 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
23122 if (!TARGET_THUMB2
)
23123 prefer_callee_reg_p
= true;
23126 || prefer_callee_reg_p
)
23128 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
23130 /* Avoid fixed registers; they may be changed at
23131 arbitrary times so it's unsafe to restore them
23132 during the epilogue. */
23134 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
23144 offsets
->saved_regs
+= 4;
23145 offsets
->saved_regs_mask
|= (1 << reg
);
23150 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
23151 offsets
->outgoing_args
= (offsets
->locals_base
23152 + crtl
->outgoing_args_size
);
23154 if (ARM_DOUBLEWORD_ALIGN
)
23156 /* Ensure SP remains doubleword aligned. */
23157 if (offsets
->outgoing_args
& 7)
23158 offsets
->outgoing_args
+= 4;
23159 gcc_assert (!(offsets
->outgoing_args
& 7));
23164 /* Calculate the relative offsets for the different stack pointers. Positive
23165 offsets are in the direction of stack growth. */
23168 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
23170 arm_stack_offsets
*offsets
;
23172 offsets
= arm_get_frame_offsets ();
23174 /* OK, now we have enough information to compute the distances.
23175 There must be an entry in these switch tables for each pair
23176 of registers in ELIMINABLE_REGS, even if some of the entries
23177 seem to be redundant or useless. */
23180 case ARG_POINTER_REGNUM
:
23183 case THUMB_HARD_FRAME_POINTER_REGNUM
:
23186 case FRAME_POINTER_REGNUM
:
23187 /* This is the reverse of the soft frame pointer
23188 to hard frame pointer elimination below. */
23189 return offsets
->soft_frame
- offsets
->saved_args
;
23191 case ARM_HARD_FRAME_POINTER_REGNUM
:
23192 /* This is only non-zero in the case where the static chain register
23193 is stored above the frame. */
23194 return offsets
->frame
- offsets
->saved_args
- 4;
23196 case STACK_POINTER_REGNUM
:
23197 /* If nothing has been pushed on the stack at all
23198 then this will return -4. This *is* correct! */
23199 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
23202 gcc_unreachable ();
23204 gcc_unreachable ();
23206 case FRAME_POINTER_REGNUM
:
23209 case THUMB_HARD_FRAME_POINTER_REGNUM
:
23212 case ARM_HARD_FRAME_POINTER_REGNUM
:
23213 /* The hard frame pointer points to the top entry in the
23214 stack frame. The soft frame pointer to the bottom entry
23215 in the stack frame. If there is no stack frame at all,
23216 then they are identical. */
23218 return offsets
->frame
- offsets
->soft_frame
;
23220 case STACK_POINTER_REGNUM
:
23221 return offsets
->outgoing_args
- offsets
->soft_frame
;
23224 gcc_unreachable ();
23226 gcc_unreachable ();
23229 /* You cannot eliminate from the stack pointer.
23230 In theory you could eliminate from the hard frame
23231 pointer to the stack pointer, but this will never
23232 happen, since if a stack frame is not needed the
23233 hard frame pointer will never be used. */
23234 gcc_unreachable ();
23238 /* Given FROM and TO register numbers, say whether this elimination is
23239 allowed. Frame pointer elimination is automatically handled.
23241 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
23242 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
23243 pointer, we must eliminate FRAME_POINTER_REGNUM into
23244 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23245 ARG_POINTER_REGNUM. */
23248 arm_can_eliminate (const int from
, const int to
)
23250 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
23251 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
23252 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
23253 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
23257 /* Emit RTL to save coprocessor registers on function entry. Returns the
23258 number of bytes pushed. */
23261 arm_save_coproc_regs(void)
23263 int saved_size
= 0;
23265 unsigned start_reg
;
23268 if (TARGET_REALLY_IWMMXT
)
23269 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
23270 if (reg_needs_saving_p (reg
))
23272 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23273 insn
= gen_rtx_MEM (V2SImode
, insn
);
23274 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
23275 RTX_FRAME_RELATED_P (insn
) = 1;
23279 if (TARGET_VFP_BASE
)
23281 start_reg
= FIRST_VFP_REGNUM
;
23283 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
23285 if (!reg_needs_saving_p (reg
) && !reg_needs_saving_p (reg
+ 1))
23287 if (start_reg
!= reg
)
23288 saved_size
+= vfp_emit_fstmd (start_reg
,
23289 (reg
- start_reg
) / 2);
23290 start_reg
= reg
+ 2;
23293 if (start_reg
!= reg
)
23294 saved_size
+= vfp_emit_fstmd (start_reg
,
23295 (reg
- start_reg
) / 2);
23301 /* Set the Thumb frame pointer from the stack pointer. */
23304 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
23306 HOST_WIDE_INT amount
;
23309 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
23311 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23312 stack_pointer_rtx
, GEN_INT (amount
)));
23315 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
23316 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
23317 expects the first two operands to be the same. */
23320 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23322 hard_frame_pointer_rtx
));
23326 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23327 hard_frame_pointer_rtx
,
23328 stack_pointer_rtx
));
23330 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
23331 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
23332 RTX_FRAME_RELATED_P (dwarf
) = 1;
23333 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23336 RTX_FRAME_RELATED_P (insn
) = 1;
23339 struct scratch_reg
{
23344 /* Return a short-lived scratch register for use as a 2nd scratch register on
23345 function entry after the registers are saved in the prologue. This register
23346 must be released by means of release_scratch_register_on_entry. IP is not
23347 considered since it is always used as the 1st scratch register if available.
23349 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23350 mask of live registers. */
23353 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
23354 unsigned long live_regs
)
23360 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
23366 for (i
= 4; i
< 11; i
++)
23367 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
23375 /* If IP is used as the 1st scratch register for a nested function,
23376 then either r3 wasn't available or is used to preserve IP. */
23377 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
23379 regno
= (regno1
== 3 ? 2 : 3);
23381 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
23386 sr
->reg
= gen_rtx_REG (SImode
, regno
);
23389 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23390 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
23391 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
23392 plus_constant (Pmode
, stack_pointer_rtx
, -4));
23393 RTX_FRAME_RELATED_P (insn
) = 1;
23394 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
23398 /* Release a scratch register obtained from the preceding function. */
23401 release_scratch_register_on_entry (struct scratch_reg
*sr
)
23405 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
23406 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
23407 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
23408 plus_constant (Pmode
, stack_pointer_rtx
, 4));
23409 RTX_FRAME_RELATED_P (insn
) = 1;
23410 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
23414 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23416 #if PROBE_INTERVAL > 4096
23417 #error Cannot use indexed addressing mode for stack probing
23420 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23421 inclusive. These are offsets from the current stack pointer. REGNO1
23422 is the index number of the 1st scratch register and LIVE_REGS is the
23423 mask of live registers. */
23426 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
23427 unsigned int regno1
, unsigned long live_regs
)
23429 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
23431 /* See if we have a constant small number of probes to generate. If so,
23432 that's the easy case. */
23433 if (size
<= PROBE_INTERVAL
)
23435 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
23436 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23437 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
23440 /* The run-time loop is made up of 10 insns in the generic case while the
23441 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23442 else if (size
<= 5 * PROBE_INTERVAL
)
23444 HOST_WIDE_INT i
, rem
;
23446 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
23447 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23448 emit_stack_probe (reg1
);
23450 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23451 it exceeds SIZE. If only two probes are needed, this will not
23452 generate any code. Then probe at FIRST + SIZE. */
23453 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
23455 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
23456 emit_stack_probe (reg1
);
23459 rem
= size
- (i
- PROBE_INTERVAL
);
23460 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
23462 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
23463 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
23466 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
23469 /* Otherwise, do the same as above, but in a loop. Note that we must be
23470 extra careful with variables wrapping around because we might be at
23471 the very top (or the very bottom) of the address space and we have
23472 to be able to handle this case properly; in particular, we use an
23473 equality test for the loop condition. */
23476 HOST_WIDE_INT rounded_size
;
23477 struct scratch_reg sr
;
23479 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
23481 emit_move_insn (reg1
, GEN_INT (first
));
23484 /* Step 1: round SIZE to the previous multiple of the interval. */
23486 rounded_size
= size
& -PROBE_INTERVAL
;
23487 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
23490 /* Step 2: compute initial and final value of the loop counter. */
23492 /* TEST_ADDR = SP + FIRST. */
23493 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23495 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23496 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
23499 /* Step 3: the loop
23503 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23506 while (TEST_ADDR != LAST_ADDR)
23508 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23509 until it is equal to ROUNDED_SIZE. */
23511 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
23514 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23515 that SIZE is equal to ROUNDED_SIZE. */
23517 if (size
!= rounded_size
)
23519 HOST_WIDE_INT rem
= size
- rounded_size
;
23521 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
23523 emit_set_insn (sr
.reg
,
23524 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
23525 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
23526 PROBE_INTERVAL
- rem
));
23529 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
23532 release_scratch_register_on_entry (&sr
);
23535 /* Make sure nothing is scheduled before we are done. */
23536 emit_insn (gen_blockage ());
23539 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23540 absolute addresses. */
23543 output_probe_stack_range (rtx reg1
, rtx reg2
)
23545 static int labelno
= 0;
23549 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
23552 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
23554 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23556 xops
[1] = GEN_INT (PROBE_INTERVAL
);
23557 output_asm_insn ("sub\t%0, %0, %1", xops
);
23559 /* Probe at TEST_ADDR. */
23560 output_asm_insn ("str\tr0, [%0, #0]", xops
);
23562 /* Test if TEST_ADDR == LAST_ADDR. */
23564 output_asm_insn ("cmp\t%0, %1", xops
);
23567 fputs ("\tbne\t", asm_out_file
);
23568 assemble_name_raw (asm_out_file
, loop_lab
);
23569 fputc ('\n', asm_out_file
);
23574 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23577 arm_expand_prologue (void)
23582 unsigned long live_regs_mask
;
23583 unsigned long func_type
;
23585 int saved_pretend_args
= 0;
23586 int saved_regs
= 0;
23587 unsigned HOST_WIDE_INT args_to_push
;
23588 HOST_WIDE_INT size
;
23589 arm_stack_offsets
*offsets
;
23592 func_type
= arm_current_func_type ();
23594 /* Naked functions don't have prologues. */
23595 if (IS_NAKED (func_type
))
23597 if (flag_stack_usage_info
)
23598 current_function_static_stack_size
= 0;
23602 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23603 args_to_push
= crtl
->args
.pretend_args_size
;
23605 /* Compute which register we will have to save onto the stack. */
23606 offsets
= arm_get_frame_offsets ();
23607 live_regs_mask
= offsets
->saved_regs_mask
;
23609 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
23611 if (IS_STACKALIGN (func_type
))
23615 /* Handle a word-aligned stack pointer. We generate the following:
23620 <save and restore r0 in normal prologue/epilogue>
23624 The unwinder doesn't need to know about the stack realignment.
23625 Just tell it we saved SP in r0. */
23626 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
23628 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
23629 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
23631 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
23632 RTX_FRAME_RELATED_P (insn
) = 1;
23633 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
23635 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
23637 /* ??? The CFA changes here, which may cause GDB to conclude that it
23638 has entered a different function. That said, the unwind info is
23639 correct, individually, before and after this instruction because
23640 we've described the save of SP, which will override the default
23641 handling of SP as restoring from the CFA. */
23642 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
23645 /* Let's compute the static_chain_stack_bytes required and store it. Right
23646 now the value must be -1 as stored by arm_init_machine_status (). */
23647 cfun
->machine
->static_chain_stack_bytes
23648 = arm_compute_static_chain_stack_bytes ();
23650 /* The static chain register is the same as the IP register. If it is
23651 clobbered when creating the frame, we need to save and restore it. */
23652 clobber_ip
= (IS_NESTED (func_type
)
23653 && (((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
23654 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
23655 || flag_stack_clash_protection
)
23656 && !df_regs_ever_live_p (LR_REGNUM
)
23657 && arm_r3_live_at_start_p ()))
23658 || arm_current_function_pac_enabled_p ()));
23660 /* Find somewhere to store IP whilst the frame is being created.
23661 We try the following places in order:
23663 1. The last argument register r3 if it is available.
23664 2. A slot on the stack above the frame if there are no
23665 arguments to push onto the stack.
23666 3. Register r3 again, after pushing the argument registers
23667 onto the stack, if this is a varargs function.
23668 4. The last slot on the stack created for the arguments to
23669 push, if this isn't a varargs function.
23671 Note - we only need to tell the dwarf2 backend about the SP
23672 adjustment in the second variant; the static chain register
23673 doesn't need to be unwound, as it doesn't contain a value
23674 inherited from the caller. */
23677 if (!arm_r3_live_at_start_p ())
23678 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
23679 else if (args_to_push
== 0)
23685 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23686 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
23689 /* Just tell the dwarf backend that we adjusted SP. */
23690 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23691 plus_constant (Pmode
, stack_pointer_rtx
,
23693 RTX_FRAME_RELATED_P (insn
) = 1;
23694 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23695 if (arm_current_function_pac_enabled_p ())
23696 cfun
->machine
->pacspval_needed
= 1;
23700 /* Store the args on the stack. */
23701 if (cfun
->machine
->uses_anonymous_args
)
23703 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
23704 (0xf0 >> (args_to_push
/ 4)) & 0xf);
23705 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
23706 saved_pretend_args
= 1;
23712 if (args_to_push
== 4)
23713 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23715 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
23716 plus_constant (Pmode
,
23720 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
23722 /* Just tell the dwarf backend that we adjusted SP. */
23723 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23724 plus_constant (Pmode
, stack_pointer_rtx
,
23726 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23729 RTX_FRAME_RELATED_P (insn
) = 1;
23730 fp_offset
= args_to_push
;
23732 if (arm_current_function_pac_enabled_p ())
23733 cfun
->machine
->pacspval_needed
= 1;
23737 if (arm_current_function_pac_enabled_p ())
23739 /* If IP was clobbered we only emit a PAC instruction as the BTI
23740 one will be added before the push of the clobbered IP (if
23741 necessary) by the bti pass. */
23742 if (aarch_bti_enabled () && !clobber_ip
)
23743 insn
= emit_insn (gen_pacbti_nop ());
23745 insn
= emit_insn (gen_pac_nop ());
23747 rtx dwarf
= gen_rtx_SET (ip_rtx
, gen_rtx_REG (SImode
, RA_AUTH_CODE
));
23748 RTX_FRAME_RELATED_P (insn
) = 1;
23749 add_reg_note (insn
, REG_CFA_REGISTER
, dwarf
);
23752 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
23754 if (IS_INTERRUPT (func_type
))
23756 /* Interrupt functions must not corrupt any registers.
23757 Creating a frame pointer however, corrupts the IP
23758 register, so we must push it first. */
23759 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
23761 /* Do not set RTX_FRAME_RELATED_P on this insn.
23762 The dwarf stack unwinding code only wants to see one
23763 stack decrement per function, and this is not it. If
23764 this instruction is labeled as being part of the frame
23765 creation sequence then dwarf2out_frame_debug_expr will
23766 die when it encounters the assignment of IP to FP
23767 later on, since the use of SP here establishes SP as
23768 the CFA register and not IP.
23770 Anyway this instruction is not really part of the stack
23771 frame creation although it is part of the prologue. */
23774 insn
= emit_set_insn (ip_rtx
,
23775 plus_constant (Pmode
, stack_pointer_rtx
,
23777 RTX_FRAME_RELATED_P (insn
) = 1;
23780 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23781 if (TARGET_HAVE_FPCXT_CMSE
&& IS_CMSE_ENTRY (func_type
))
23784 insn
= emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx
,
23785 GEN_INT (FPCXTNS_ENUM
)));
23786 rtx dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23787 plus_constant (Pmode
, stack_pointer_rtx
, -4));
23788 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23789 RTX_FRAME_RELATED_P (insn
) = 1;
23794 /* Push the argument registers, or reserve space for them. */
23795 if (cfun
->machine
->uses_anonymous_args
)
23796 insn
= emit_multi_reg_push
23797 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
23798 (0xf0 >> (args_to_push
/ 4)) & 0xf);
23801 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
23802 GEN_INT (- args_to_push
)));
23803 RTX_FRAME_RELATED_P (insn
) = 1;
23806 /* If this is an interrupt service routine, and the link register
23807 is going to be pushed, and we're not generating extra
23808 push of IP (needed when frame is needed and frame layout if apcs),
23809 subtracting four from LR now will mean that the function return
23810 can be done with a single instruction. */
23811 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
23812 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
23813 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
23816 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
23818 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
23821 if (live_regs_mask
)
23823 unsigned long dwarf_regs_mask
= live_regs_mask
;
23825 saved_regs
+= bit_count (live_regs_mask
) * 4;
23826 if (optimize_size
&& !frame_pointer_needed
23827 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
23829 /* If no coprocessor registers are being pushed and we don't have
23830 to worry about a frame pointer then push extra registers to
23831 create the stack frame. This is done in a way that does not
23832 alter the frame layout, so is independent of the epilogue. */
23836 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
23838 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
23839 if (frame
&& n
* 4 >= frame
)
23842 live_regs_mask
|= (1 << n
) - 1;
23843 saved_regs
+= frame
;
23848 && current_tune
->prefer_ldrd_strd
23849 && !optimize_function_for_size_p (cfun
))
23851 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
23853 thumb2_emit_strd_push (live_regs_mask
);
23854 else if (TARGET_ARM
23855 && !TARGET_APCS_FRAME
23856 && !IS_INTERRUPT (func_type
))
23857 arm_emit_strd_push (live_regs_mask
);
23860 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
23861 RTX_FRAME_RELATED_P (insn
) = 1;
23866 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
23867 RTX_FRAME_RELATED_P (insn
) = 1;
23871 if (! IS_VOLATILE (func_type
))
23872 saved_regs
+= arm_save_coproc_regs ();
23874 if (frame_pointer_needed
&& TARGET_ARM
)
23876 /* Create the new frame pointer. */
23877 if (TARGET_APCS_FRAME
)
23879 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
23880 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
23881 RTX_FRAME_RELATED_P (insn
) = 1;
23885 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
23886 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23887 stack_pointer_rtx
, insn
));
23888 RTX_FRAME_RELATED_P (insn
) = 1;
23892 size
= offsets
->outgoing_args
- offsets
->saved_args
;
23893 if (flag_stack_usage_info
)
23894 current_function_static_stack_size
= size
;
23896 /* If this isn't an interrupt service routine and we have a frame, then do
23897 stack checking. We use IP as the first scratch register, except for the
23898 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
23899 if (!IS_INTERRUPT (func_type
)
23900 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
23901 || flag_stack_clash_protection
))
23903 unsigned int regno
;
23905 if (!IS_NESTED (func_type
) || clobber_ip
)
23907 else if (df_regs_ever_live_p (LR_REGNUM
))
23912 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
23914 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
23915 arm_emit_probe_stack_range (get_stack_check_protect (),
23916 size
- get_stack_check_protect (),
23917 regno
, live_regs_mask
);
23920 arm_emit_probe_stack_range (get_stack_check_protect (), size
,
23921 regno
, live_regs_mask
);
23924 /* Recover the static chain register. */
23927 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
23928 insn
= gen_rtx_REG (SImode
, 3);
23931 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
23932 insn
= gen_frame_mem (SImode
, insn
);
23934 emit_set_insn (ip_rtx
, insn
);
23935 emit_insn (gen_force_register_use (ip_rtx
));
23938 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
23940 /* This add can produce multiple insns for a large constant, so we
23941 need to get tricky. */
23942 rtx_insn
*last
= get_last_insn ();
23944 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
23945 - offsets
->outgoing_args
);
23947 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
23951 last
= last
? NEXT_INSN (last
) : get_insns ();
23952 RTX_FRAME_RELATED_P (last
) = 1;
23954 while (last
!= insn
);
23956 /* If the frame pointer is needed, emit a special barrier that
23957 will prevent the scheduler from moving stores to the frame
23958 before the stack adjustment. */
23959 if (frame_pointer_needed
)
23960 emit_insn (gen_stack_tie (stack_pointer_rtx
,
23961 hard_frame_pointer_rtx
));
23965 if (frame_pointer_needed
&& TARGET_THUMB2
)
23966 thumb_set_frame_pointer (offsets
);
23968 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
23970 unsigned long mask
;
23972 mask
= live_regs_mask
;
23973 mask
&= THUMB2_WORK_REGS
;
23974 if (!IS_NESTED (func_type
))
23975 mask
|= (1 << IP_REGNUM
);
23976 arm_load_pic_register (mask
, NULL_RTX
);
23979 /* If we are profiling, make sure no instructions are scheduled before
23980 the call to mcount. Similarly if the user has requested no
23981 scheduling in the prolog. Similarly if we want non-call exceptions
23982 using the EABI unwinder, to prevent faulting instructions from being
23983 swapped with a stack adjustment. */
23984 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
23985 || (arm_except_unwind_info (&global_options
) == UI_TARGET
23986 && cfun
->can_throw_non_call_exceptions
))
23987 emit_insn (gen_blockage ());
23989 /* If the link register is being kept alive, with the return address in it,
23990 then make sure that it does not get reused by the ce2 pass. */
23991 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
23992 cfun
->machine
->lr_save_eliminated
= 1;
23995 /* Print condition code to STREAM. Helper function for arm_print_operand. */
23997 arm_print_condition (FILE *stream
)
23999 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
24001 /* Branch conversion is not implemented for Thumb-2. */
24004 output_operand_lossage ("predicated Thumb instruction");
24007 if (current_insn_predicate
!= NULL
)
24009 output_operand_lossage
24010 ("predicated instruction in conditional sequence");
24014 fputs (arm_condition_codes
[arm_current_cc
], stream
);
24016 else if (current_insn_predicate
)
24018 enum arm_cond_code code
;
24022 output_operand_lossage ("predicated Thumb instruction");
24026 code
= get_arm_condition_code (current_insn_predicate
);
24027 fputs (arm_condition_codes
[code
], stream
);
24032 /* Globally reserved letters: acln
24033 Puncutation letters currently used: @_|?().!#
24034 Lower case letters currently used: bcdefhimpqtvwxyz
24035 Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
24036 Letters previously used, but now deprecated/obsolete: sWXYZ.
24038 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
24040 If CODE is 'd', then the X is a condition operand and the instruction
24041 should only be executed if the condition is true.
24042 if CODE is 'D', then the X is a condition operand and the instruction
24043 should only be executed if the condition is false: however, if the mode
24044 of the comparison is CCFPEmode, then always execute the instruction -- we
24045 do this because in these circumstances !GE does not necessarily imply LT;
24046 in these cases the instruction pattern will take care to make sure that
24047 an instruction containing %d will follow, thereby undoing the effects of
24048 doing this instruction unconditionally.
24049 If CODE is 'N' then X is a floating point operand that must be negated
24051 If CODE is 'B' then output a bitwise inverted value of X (a const int).
24052 If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
24053 If CODE is 'V', then the operand must be a CONST_INT representing
24054 the bits to preserve in the modified register (Rd) of a BFI or BFC
24055 instruction: print out both the width and lsb (shift) fields. */
24057 arm_print_operand (FILE *stream
, rtx x
, int code
)
24062 fputs (ASM_COMMENT_START
, stream
);
24066 fputs (user_label_prefix
, stream
);
24070 fputs (REGISTER_PREFIX
, stream
);
24074 arm_print_condition (stream
);
24078 /* The current condition code for a condition code setting instruction.
24079 Preceded by 's' in unified syntax, otherwise followed by 's'. */
24080 fputc('s', stream
);
24081 arm_print_condition (stream
);
24085 /* If the instruction is conditionally executed then print
24086 the current condition code, otherwise print 's'. */
24087 gcc_assert (TARGET_THUMB2
);
24088 if (current_insn_predicate
)
24089 arm_print_condition (stream
);
24091 fputc('s', stream
);
24094 /* %# is a "break" sequence. It doesn't output anything, but is used to
24095 separate e.g. operand numbers from following text, if that text consists
24096 of further digits which we don't want to be part of the operand
24104 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
24105 fprintf (stream
, "%s", fp_const_from_val (&r
));
24109 /* An integer or symbol address without a preceding # sign. */
24111 switch (GET_CODE (x
))
24114 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
24118 output_addr_const (stream
, x
);
24122 if (GET_CODE (XEXP (x
, 0)) == PLUS
24123 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
24125 output_addr_const (stream
, x
);
24128 /* Fall through. */
24131 output_operand_lossage ("Unsupported operand for code '%c'", code
);
24135 /* An integer that we want to print in HEX. */
24137 switch (GET_CODE (x
))
24140 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
24144 output_operand_lossage ("Unsupported operand for code '%c'", code
);
24149 if (CONST_INT_P (x
))
24152 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
24153 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
24157 putc ('~', stream
);
24158 output_addr_const (stream
, x
);
24163 /* Print the log2 of a CONST_INT. */
24167 if (!CONST_INT_P (x
)
24168 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
24169 output_operand_lossage ("Unsupported operand for code '%c'", code
);
24171 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
24176 /* The low 16 bits of an immediate constant. */
24177 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
24181 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
24185 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
24193 shift
= shift_op (x
, &val
);
24197 fprintf (stream
, ", %s ", shift
);
24199 arm_print_operand (stream
, XEXP (x
, 1), 0);
24201 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
24206 /* An explanation of the 'Q', 'R' and 'H' register operands:
24208 In a pair of registers containing a DI or DF value the 'Q'
24209 operand returns the register number of the register containing
24210 the least significant part of the value. The 'R' operand returns
24211 the register number of the register containing the most
24212 significant part of the value.
24214 The 'H' operand returns the higher of the two register numbers.
24215 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24216 same as the 'Q' operand, since the most significant part of the
24217 value is held in the lower number register. The reverse is true
24218 on systems where WORDS_BIG_ENDIAN is false.
24220 The purpose of these operands is to distinguish between cases
24221 where the endian-ness of the values is important (for example
24222 when they are added together), and cases where the endian-ness
24223 is irrelevant, but the order of register operations is important.
24224 For example when loading a value from memory into a register
24225 pair, the endian-ness does not matter. Provided that the value
24226 from the lower memory address is put into the lower numbered
24227 register, and the value from the higher address is put into the
24228 higher numbered register, the load will work regardless of whether
24229 the value being loaded is big-wordian or little-wordian. The
24230 order of the two register loads can matter however, if the address
24231 of the memory location is actually held in one of the registers
24232 being overwritten by the load.
24234 The 'Q' and 'R' constraints are also available for 64-bit
24237 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
24239 rtx part
= gen_lowpart (SImode
, x
);
24240 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
24244 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24246 output_operand_lossage ("invalid operand for code '%c'", code
);
24250 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
24254 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
24256 machine_mode mode
= GET_MODE (x
);
24259 if (mode
== VOIDmode
)
24261 part
= gen_highpart_mode (SImode
, mode
, x
);
24262 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
24266 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24268 output_operand_lossage ("invalid operand for code '%c'", code
);
24272 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
24276 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24278 output_operand_lossage ("invalid operand for code '%c'", code
);
24282 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
24286 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24288 output_operand_lossage ("invalid operand for code '%c'", code
);
24292 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
24296 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24298 output_operand_lossage ("invalid operand for code '%c'", code
);
24302 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
24306 asm_fprintf (stream
, "%r",
24307 REG_P (XEXP (x
, 0))
24308 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
24312 asm_fprintf (stream
, "{%r-%r}",
24314 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
24317 /* Like 'M', but writing doubleword vector registers, for use by Neon
24321 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
24322 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
24324 asm_fprintf (stream
, "{d%d}", regno
);
24326 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
24331 /* CONST_TRUE_RTX means always -- that's the default. */
24332 if (x
== const_true_rtx
)
24335 if (!COMPARISON_P (x
))
24337 output_operand_lossage ("invalid operand for code '%c'", code
);
24341 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
24346 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
24347 want to do that. */
24348 if (x
== const_true_rtx
)
24350 output_operand_lossage ("instruction never executed");
24353 if (!COMPARISON_P (x
))
24355 output_operand_lossage ("invalid operand for code '%c'", code
);
24359 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
24360 (get_arm_condition_code (x
))],
24366 /* Output the LSB (shift) and width for a bitmask instruction
24367 based on a literal mask. The LSB is printed first,
24368 followed by the width.
24370 Eg. For 0b1...1110001, the result is #1, #3. */
24371 if (!CONST_INT_P (x
))
24373 output_operand_lossage ("invalid operand for code '%c'", code
);
24377 unsigned HOST_WIDE_INT val
24378 = ~UINTVAL (x
) & HOST_WIDE_INT_UC (0xffffffff);
24379 int lsb
= exact_log2 (val
& -val
);
24380 asm_fprintf (stream
, "#%d, #%d", lsb
,
24381 (exact_log2 (val
+ (val
& -val
)) - lsb
));
24390 /* Former Maverick support, removed after GCC-4.7. */
24391 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
24396 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
24397 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
24398 /* Bad value for wCG register number. */
24400 output_operand_lossage ("invalid operand for code '%c'", code
);
24405 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
24408 /* Print an iWMMXt control register name. */
24410 if (!CONST_INT_P (x
)
24412 || INTVAL (x
) >= 16)
24413 /* Bad value for wC register number. */
24415 output_operand_lossage ("invalid operand for code '%c'", code
);
24421 static const char * wc_reg_names
[16] =
24423 "wCID", "wCon", "wCSSF", "wCASF",
24424 "wC4", "wC5", "wC6", "wC7",
24425 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24426 "wC12", "wC13", "wC14", "wC15"
24429 fputs (wc_reg_names
[INTVAL (x
)], stream
);
24433 /* Print the high single-precision register of a VFP double-precision
24437 machine_mode mode
= GET_MODE (x
);
24440 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
24442 output_operand_lossage ("invalid operand for code '%c'", code
);
24447 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
24449 output_operand_lossage ("invalid operand for code '%c'", code
);
24453 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
24457 /* Print a VFP/Neon double precision or quad precision register name. */
24461 machine_mode mode
= GET_MODE (x
);
24462 int is_quad
= (code
== 'q');
24465 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
24467 output_operand_lossage ("invalid operand for code '%c'", code
);
24472 || !IS_VFP_REGNUM (REGNO (x
)))
24474 output_operand_lossage ("invalid operand for code '%c'", code
);
24479 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
24480 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
24482 output_operand_lossage ("invalid operand for code '%c'", code
);
24486 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
24487 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
24491 /* These two codes print the low/high doubleword register of a Neon quad
24492 register, respectively. For pair-structure types, can also print
24493 low/high quadword registers. */
24497 machine_mode mode
= GET_MODE (x
);
24500 if ((GET_MODE_SIZE (mode
) != 16
24501 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
24503 output_operand_lossage ("invalid operand for code '%c'", code
);
24508 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
24510 output_operand_lossage ("invalid operand for code '%c'", code
);
24514 if (GET_MODE_SIZE (mode
) == 16)
24515 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
24516 + (code
== 'f' ? 1 : 0));
24518 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
24519 + (code
== 'f' ? 1 : 0));
24523 /* Print a VFPv3 floating-point constant, represented as an integer
24527 int index
= vfp3_const_double_index (x
);
24528 gcc_assert (index
!= -1);
24529 fprintf (stream
, "%d", index
);
24533 /* Print bits representing opcode features for Neon.
24535 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24536 and polynomials as unsigned.
24538 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24540 Bit 2 is 1 for rounding functions, 0 otherwise. */
24542 /* Identify the type as 's', 'u', 'p' or 'f'. */
24545 HOST_WIDE_INT bits
= INTVAL (x
);
24546 fputc ("uspf"[bits
& 3], stream
);
24550 /* Likewise, but signed and unsigned integers are both 'i'. */
24553 HOST_WIDE_INT bits
= INTVAL (x
);
24554 fputc ("iipf"[bits
& 3], stream
);
24558 /* As for 'T', but emit 'u' instead of 'p'. */
24561 HOST_WIDE_INT bits
= INTVAL (x
);
24562 fputc ("usuf"[bits
& 3], stream
);
24566 /* Bit 2: rounding (vs none). */
24569 HOST_WIDE_INT bits
= INTVAL (x
);
24570 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
24574 /* Memory operand for vld1/vst1 instruction. */
24578 bool postinc
= FALSE
;
24579 rtx postinc_reg
= NULL
;
24580 unsigned align
, memsize
, align_bits
;
24582 gcc_assert (MEM_P (x
));
24583 addr
= XEXP (x
, 0);
24584 if (GET_CODE (addr
) == POST_INC
)
24587 addr
= XEXP (addr
, 0);
24589 if (GET_CODE (addr
) == POST_MODIFY
)
24591 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
24592 addr
= XEXP (addr
, 0);
24594 asm_fprintf (stream
, "[%r", REGNO (addr
));
24596 /* We know the alignment of this access, so we can emit a hint in the
24597 instruction (for some alignments) as an aid to the memory subsystem
24599 align
= MEM_ALIGN (x
) >> 3;
24600 memsize
= MEM_SIZE (x
);
24602 /* Only certain alignment specifiers are supported by the hardware. */
24603 if (memsize
== 32 && (align
% 32) == 0)
24605 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
24607 else if (memsize
>= 8 && (align
% 8) == 0)
24612 if (align_bits
!= 0)
24613 asm_fprintf (stream
, ":%d", align_bits
);
24615 asm_fprintf (stream
, "]");
24618 fputs("!", stream
);
24620 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
24624 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24625 rtx_code the memory operands output looks like following.
24627 2. [Rn, #+/-<imm>]!
24633 rtx postinc_reg
= NULL
;
24634 unsigned inc_val
= 0;
24635 enum rtx_code code
;
24637 gcc_assert (MEM_P (x
));
24638 addr
= XEXP (x
, 0);
24639 code
= GET_CODE (addr
);
24640 if (code
== POST_INC
|| code
== POST_DEC
|| code
== PRE_INC
24641 || code
== PRE_DEC
)
24643 asm_fprintf (stream
, "[%r", REGNO (XEXP (addr
, 0)));
24644 inc_val
= GET_MODE_SIZE (GET_MODE (x
));
24645 if (code
== POST_INC
|| code
== POST_DEC
)
24646 asm_fprintf (stream
, "], #%s%d",(code
== POST_INC
)
24647 ? "": "-", inc_val
);
24649 asm_fprintf (stream
, ", #%s%d]!",(code
== PRE_INC
)
24650 ? "": "-", inc_val
);
24652 else if (code
== POST_MODIFY
|| code
== PRE_MODIFY
)
24654 asm_fprintf (stream
, "[%r", REGNO (XEXP (addr
, 0)));
24655 postinc_reg
= XEXP (XEXP (addr
, 1), 1);
24656 if (postinc_reg
&& CONST_INT_P (postinc_reg
))
24658 if (code
== POST_MODIFY
)
24659 asm_fprintf (stream
, "], #%wd",INTVAL (postinc_reg
));
24661 asm_fprintf (stream
, ", #%wd]!",INTVAL (postinc_reg
));
24664 else if (code
== PLUS
)
24666 rtx base
= XEXP (addr
, 0);
24667 rtx index
= XEXP (addr
, 1);
24669 gcc_assert (REG_P (base
) && CONST_INT_P (index
));
24671 HOST_WIDE_INT offset
= INTVAL (index
);
24672 asm_fprintf (stream
, "[%r, #%wd]", REGNO (base
), offset
);
24676 gcc_assert (REG_P (addr
));
24677 asm_fprintf (stream
, "[%r]",REGNO (addr
));
24686 gcc_assert (MEM_P (x
));
24687 addr
= XEXP (x
, 0);
24688 gcc_assert (REG_P (addr
));
24689 asm_fprintf (stream
, "[%r]", REGNO (addr
));
24693 /* Translate an S register number into a D register number and element index. */
24696 machine_mode mode
= GET_MODE (x
);
24699 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
24701 output_operand_lossage ("invalid operand for code '%c'", code
);
24706 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
24708 output_operand_lossage ("invalid operand for code '%c'", code
);
24712 regno
= regno
- FIRST_VFP_REGNUM
;
24713 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
24718 gcc_assert (CONST_DOUBLE_P (x
));
24720 result
= vfp3_const_double_for_fract_bits (x
);
24722 result
= vfp3_const_double_for_bits (x
);
24723 fprintf (stream
, "#%d", result
);
24726 /* Register specifier for vld1.16/vst1.16. Translate the S register
24727 number into a D register number and element index. */
24730 machine_mode mode
= GET_MODE (x
);
24733 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
24735 output_operand_lossage ("invalid operand for code '%c'", code
);
24740 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
24742 output_operand_lossage ("invalid operand for code '%c'", code
);
24746 regno
= regno
- FIRST_VFP_REGNUM
;
24747 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
24754 output_operand_lossage ("missing operand");
24758 switch (GET_CODE (x
))
24761 asm_fprintf (stream
, "%r", REGNO (x
));
24765 output_address (GET_MODE (x
), XEXP (x
, 0));
24771 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
24772 sizeof (fpstr
), 0, 1);
24773 fprintf (stream
, "#%s", fpstr
);
24778 gcc_assert (GET_CODE (x
) != NEG
);
24779 fputc ('#', stream
);
24780 if (GET_CODE (x
) == HIGH
)
24782 fputs (":lower16:", stream
);
24786 output_addr_const (stream
, x
);
24792 /* Target hook for printing a memory address. */
24794 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
24798 int is_minus
= GET_CODE (x
) == MINUS
;
24801 asm_fprintf (stream
, "[%r]", REGNO (x
));
24802 else if (GET_CODE (x
) == PLUS
|| is_minus
)
24804 rtx base
= XEXP (x
, 0);
24805 rtx index
= XEXP (x
, 1);
24806 HOST_WIDE_INT offset
= 0;
24808 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
24810 /* Ensure that BASE is a register. */
24811 /* (one of them must be). */
24812 /* Also ensure the SP is not used as in index register. */
24813 std::swap (base
, index
);
24815 switch (GET_CODE (index
))
24818 offset
= INTVAL (index
);
24821 asm_fprintf (stream
, "[%r, #%wd]",
24822 REGNO (base
), offset
);
24826 asm_fprintf (stream
, "[%r, %s%r]",
24827 REGNO (base
), is_minus
? "-" : "",
24837 asm_fprintf (stream
, "[%r, %s%r",
24838 REGNO (base
), is_minus
? "-" : "",
24839 REGNO (XEXP (index
, 0)));
24840 arm_print_operand (stream
, index
, 'S');
24841 fputs ("]", stream
);
24846 gcc_unreachable ();
24849 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
24850 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
24852 gcc_assert (REG_P (XEXP (x
, 0)));
24854 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
24855 asm_fprintf (stream
, "[%r, #%s%d]!",
24856 REGNO (XEXP (x
, 0)),
24857 GET_CODE (x
) == PRE_DEC
? "-" : "",
24858 GET_MODE_SIZE (mode
));
24859 else if (TARGET_HAVE_MVE
&& (mode
== OImode
|| mode
== XImode
))
24860 asm_fprintf (stream
, "[%r]!", REGNO (XEXP (x
,0)));
24862 asm_fprintf (stream
, "[%r], #%s%d", REGNO (XEXP (x
, 0)),
24863 GET_CODE (x
) == POST_DEC
? "-" : "",
24864 GET_MODE_SIZE (mode
));
24866 else if (GET_CODE (x
) == PRE_MODIFY
)
24868 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
24869 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
24870 asm_fprintf (stream
, "#%wd]!",
24871 INTVAL (XEXP (XEXP (x
, 1), 1)));
24873 asm_fprintf (stream
, "%r]!",
24874 REGNO (XEXP (XEXP (x
, 1), 1)));
24876 else if (GET_CODE (x
) == POST_MODIFY
)
24878 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
24879 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
24880 asm_fprintf (stream
, "#%wd",
24881 INTVAL (XEXP (XEXP (x
, 1), 1)));
24883 asm_fprintf (stream
, "%r",
24884 REGNO (XEXP (XEXP (x
, 1), 1)));
24886 else output_addr_const (stream
, x
);
24891 asm_fprintf (stream
, "[%r]", REGNO (x
));
24892 else if (GET_CODE (x
) == POST_INC
)
24893 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
24894 else if (GET_CODE (x
) == PLUS
)
24896 gcc_assert (REG_P (XEXP (x
, 0)));
24897 if (CONST_INT_P (XEXP (x
, 1)))
24898 asm_fprintf (stream
, "[%r, #%wd]",
24899 REGNO (XEXP (x
, 0)),
24900 INTVAL (XEXP (x
, 1)));
24902 asm_fprintf (stream
, "[%r, %r]",
24903 REGNO (XEXP (x
, 0)),
24904 REGNO (XEXP (x
, 1)));
24907 output_addr_const (stream
, x
);
24911 /* Target hook for indicating whether a punctuation character for
24912 TARGET_PRINT_OPERAND is valid. */
24914 arm_print_operand_punct_valid_p (unsigned char code
)
24916 return (code
== '@' || code
== '|' || code
== '.'
24917 || code
== '(' || code
== ')' || code
== '#'
24918 || (TARGET_32BIT
&& (code
== '?'))
24919 || (TARGET_THUMB2
&& (code
== '!'))
24920 || (TARGET_THUMB
&& (code
== '_')));
24923 /* Target hook for assembling integer objects. The ARM version needs to
24924 handle word-sized values specially. */
24926 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
24930 if (size
== UNITS_PER_WORD
&& aligned_p
)
24932 fputs ("\t.word\t", asm_out_file
);
24933 output_addr_const (asm_out_file
, x
);
24935 /* Mark symbols as position independent. We only do this in the
24936 .text segment, not in the .data segment. */
24937 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
24938 (SYMBOL_REF_P (x
) || LABEL_REF_P (x
)))
24940 /* See legitimize_pic_address for an explanation of the
24941 TARGET_VXWORKS_RTP check. */
24942 /* References to weak symbols cannot be resolved locally:
24943 they may be overridden by a non-weak definition at link
24945 if (!arm_pic_data_is_text_relative
24946 || (SYMBOL_REF_P (x
)
24947 && (!SYMBOL_REF_LOCAL_P (x
)
24948 || (SYMBOL_REF_DECL (x
)
24949 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0)
24950 || (SYMBOL_REF_FUNCTION_P (x
)
24951 && !arm_fdpic_local_funcdesc_p (x
)))))
24953 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
24954 fputs ("(GOTFUNCDESC)", asm_out_file
);
24956 fputs ("(GOT)", asm_out_file
);
24960 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
24961 fputs ("(GOTOFFFUNCDESC)", asm_out_file
);
24967 || arm_is_segment_info_known (x
, &is_readonly
))
24968 fputs ("(GOTOFF)", asm_out_file
);
24970 fputs ("(GOT)", asm_out_file
);
24975 /* For FDPIC we also have to mark symbol for .data section. */
24977 && !making_const_table
24978 && SYMBOL_REF_P (x
)
24979 && SYMBOL_REF_FUNCTION_P (x
))
24980 fputs ("(FUNCDESC)", asm_out_file
);
24982 fputc ('\n', asm_out_file
);
24986 mode
= GET_MODE (x
);
24988 if (arm_vector_mode_supported_p (mode
))
24992 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
24994 units
= CONST_VECTOR_NUNITS (x
);
24995 size
= GET_MODE_UNIT_SIZE (mode
);
24997 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
24998 for (i
= 0; i
< units
; i
++)
25000 rtx elt
= CONST_VECTOR_ELT (x
, i
);
25002 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
25005 for (i
= 0; i
< units
; i
++)
25007 rtx elt
= CONST_VECTOR_ELT (x
, i
);
25009 (*CONST_DOUBLE_REAL_VALUE (elt
),
25010 as_a
<scalar_float_mode
> (GET_MODE_INNER (mode
)),
25011 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
25017 return default_assemble_integer (x
, size
, aligned_p
);
25021 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
25025 if (!TARGET_AAPCS_BASED
)
25028 default_named_section_asm_out_constructor
25029 : default_named_section_asm_out_destructor
) (symbol
, priority
);
25033 /* Put these in the .init_array section, using a special relocation. */
25034 if (priority
!= DEFAULT_INIT_PRIORITY
)
25037 sprintf (buf
, "%s.%.5u",
25038 is_ctor
? ".init_array" : ".fini_array",
25040 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
25047 switch_to_section (s
);
25048 assemble_align (POINTER_SIZE
);
25049 fputs ("\t.word\t", asm_out_file
);
25050 output_addr_const (asm_out_file
, symbol
);
25051 fputs ("(target1)\n", asm_out_file
);
25054 /* Add a function to the list of static constructors. */
25057 arm_elf_asm_constructor (rtx symbol
, int priority
)
25059 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
25062 /* Add a function to the list of static destructors. */
25065 arm_elf_asm_destructor (rtx symbol
, int priority
)
25067 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
25070 /* A finite state machine takes care of noticing whether or not instructions
25071 can be conditionally executed, and thus decrease execution time and code
25072 size by deleting branch instructions. The fsm is controlled by
25073 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
25075 /* The state of the fsm controlling condition codes are:
25076 0: normal, do nothing special
25077 1: make ASM_OUTPUT_OPCODE not output this instruction
25078 2: make ASM_OUTPUT_OPCODE not output this instruction
25079 3: make instructions conditional
25080 4: make instructions conditional
25082 State transitions (state->state by whom under condition):
25083 0 -> 1 final_prescan_insn if the `target' is a label
25084 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
25085 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
25086 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
25087 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
25088 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
25089 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
25090 (the target insn is arm_target_insn).
25092 If the jump clobbers the conditions then we use states 2 and 4.
25094 A similar thing can be done with conditional return insns.
25096 XXX In case the `target' is an unconditional branch, this conditionalising
25097 of the instructions always reduces code size, but not always execution
25098 time. But then, I want to reduce the code size to somewhere near what
25099 /bin/cc produces. */
25101 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
25102 instructions. When a COND_EXEC instruction is seen the subsequent
25103 instructions are scanned so that multiple conditional instructions can be
25104 combined into a single IT block. arm_condexec_count and arm_condexec_mask
25105 specify the length and true/false mask for the IT block. These will be
25106 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
25108 /* Returns the index of the ARM condition code string in
25109 `arm_condition_codes', or ARM_NV if the comparison is invalid.
25110 COMPARISON should be an rtx like `(eq (...) (...))'. */
25113 maybe_get_arm_condition_code (rtx comparison
)
25115 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
25116 enum arm_cond_code code
;
25117 enum rtx_code comp_code
= GET_CODE (comparison
);
25119 if (GET_MODE_CLASS (mode
) != MODE_CC
)
25120 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
25121 XEXP (comparison
, 1));
25125 case E_CC_DNEmode
: code
= ARM_NE
; goto dominance
;
25126 case E_CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
25127 case E_CC_DGEmode
: code
= ARM_GE
; goto dominance
;
25128 case E_CC_DGTmode
: code
= ARM_GT
; goto dominance
;
25129 case E_CC_DLEmode
: code
= ARM_LE
; goto dominance
;
25130 case E_CC_DLTmode
: code
= ARM_LT
; goto dominance
;
25131 case E_CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
25132 case E_CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
25133 case E_CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
25134 case E_CC_DLTUmode
: code
= ARM_CC
;
25137 if (comp_code
== EQ
)
25138 return ARM_INVERSE_CONDITION_CODE (code
);
25139 if (comp_code
== NE
)
25146 case NE
: return ARM_NE
;
25147 case EQ
: return ARM_EQ
;
25148 case GE
: return ARM_PL
;
25149 case LT
: return ARM_MI
;
25150 default: return ARM_NV
;
25156 case NE
: return ARM_NE
;
25157 case EQ
: return ARM_EQ
;
25158 default: return ARM_NV
;
25164 case NE
: return ARM_MI
;
25165 case EQ
: return ARM_PL
;
25166 default: return ARM_NV
;
25171 /* We can handle all cases except UNEQ and LTGT. */
25174 case GE
: return ARM_GE
;
25175 case GT
: return ARM_GT
;
25176 case LE
: return ARM_LS
;
25177 case LT
: return ARM_MI
;
25178 case NE
: return ARM_NE
;
25179 case EQ
: return ARM_EQ
;
25180 case ORDERED
: return ARM_VC
;
25181 case UNORDERED
: return ARM_VS
;
25182 case UNLT
: return ARM_LT
;
25183 case UNLE
: return ARM_LE
;
25184 case UNGT
: return ARM_HI
;
25185 case UNGE
: return ARM_PL
;
25186 /* UNEQ and LTGT do not have a representation. */
25187 case UNEQ
: /* Fall through. */
25188 case LTGT
: /* Fall through. */
25189 default: return ARM_NV
;
25195 case NE
: return ARM_NE
;
25196 case EQ
: return ARM_EQ
;
25197 case GE
: return ARM_LE
;
25198 case GT
: return ARM_LT
;
25199 case LE
: return ARM_GE
;
25200 case LT
: return ARM_GT
;
25201 case GEU
: return ARM_LS
;
25202 case GTU
: return ARM_CC
;
25203 case LEU
: return ARM_CS
;
25204 case LTU
: return ARM_HI
;
25205 default: return ARM_NV
;
25211 case LTU
: return ARM_CS
;
25212 case GEU
: return ARM_CC
;
25213 default: return ARM_NV
;
25219 case GE
: return ARM_GE
;
25220 case LT
: return ARM_LT
;
25221 default: return ARM_NV
;
25227 case GEU
: return ARM_CS
;
25228 case LTU
: return ARM_CC
;
25229 default: return ARM_NV
;
25235 case NE
: return ARM_VS
;
25236 case EQ
: return ARM_VC
;
25237 default: return ARM_NV
;
25243 case GEU
: return ARM_CS
;
25244 case LTU
: return ARM_CC
;
25245 default: return ARM_NV
;
25252 case NE
: return ARM_NE
;
25253 case EQ
: return ARM_EQ
;
25254 case GE
: return ARM_GE
;
25255 case GT
: return ARM_GT
;
25256 case LE
: return ARM_LE
;
25257 case LT
: return ARM_LT
;
25258 case GEU
: return ARM_CS
;
25259 case GTU
: return ARM_HI
;
25260 case LEU
: return ARM_LS
;
25261 case LTU
: return ARM_CC
;
25262 default: return ARM_NV
;
25265 default: gcc_unreachable ();
25269 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
25270 static enum arm_cond_code
25271 get_arm_condition_code (rtx comparison
)
25273 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
25274 gcc_assert (code
!= ARM_NV
);
25278 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
25279 code registers when not targetting Thumb1. The VFP condition register
25280 only exists when generating hard-float code. */
25282 arm_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
25288 *p2
= TARGET_VFP_BASE
? VFPCC_REGNUM
: INVALID_REGNUM
;
25292 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25295 thumb2_final_prescan_insn (rtx_insn
*insn
)
25297 rtx_insn
*first_insn
= insn
;
25298 rtx body
= PATTERN (insn
);
25300 enum arm_cond_code code
;
25305 /* max_insns_skipped in the tune was already taken into account in the
25306 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
25307 just emit the IT blocks as we can. It does not make sense to split
25309 max
= MAX_INSN_PER_IT_BLOCK
;
25311 /* Remove the previous insn from the count of insns to be output. */
25312 if (arm_condexec_count
)
25313 arm_condexec_count
--;
25315 /* Nothing to do if we are already inside a conditional block. */
25316 if (arm_condexec_count
)
25319 if (GET_CODE (body
) != COND_EXEC
)
25322 /* Conditional jumps are implemented directly. */
25326 predicate
= COND_EXEC_TEST (body
);
25327 arm_current_cc
= get_arm_condition_code (predicate
);
25329 n
= get_attr_ce_count (insn
);
25330 arm_condexec_count
= 1;
25331 arm_condexec_mask
= (1 << n
) - 1;
25332 arm_condexec_masklen
= n
;
25333 /* See if subsequent instructions can be combined into the same block. */
25336 insn
= next_nonnote_insn (insn
);
25338 /* Jumping into the middle of an IT block is illegal, so a label or
25339 barrier terminates the block. */
25340 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
25343 body
= PATTERN (insn
);
25344 /* USE and CLOBBER aren't really insns, so just skip them. */
25345 if (GET_CODE (body
) == USE
25346 || GET_CODE (body
) == CLOBBER
)
25349 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
25350 if (GET_CODE (body
) != COND_EXEC
)
25352 /* Maximum number of conditionally executed instructions in a block. */
25353 n
= get_attr_ce_count (insn
);
25354 if (arm_condexec_masklen
+ n
> max
)
25357 predicate
= COND_EXEC_TEST (body
);
25358 code
= get_arm_condition_code (predicate
);
25359 mask
= (1 << n
) - 1;
25360 if (arm_current_cc
== code
)
25361 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
25362 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
25365 arm_condexec_count
++;
25366 arm_condexec_masklen
+= n
;
25368 /* A jump must be the last instruction in a conditional block. */
25372 /* Restore recog_data (getting the attributes of other insns can
25373 destroy this array, but final.cc assumes that it remains intact
25374 across this call). */
25375 extract_constrain_insn_cached (first_insn
);
25379 arm_final_prescan_insn (rtx_insn
*insn
)
25381 /* BODY will hold the body of INSN. */
25382 rtx body
= PATTERN (insn
);
25384 /* This will be 1 if trying to repeat the trick, and things need to be
25385 reversed if it appears to fail. */
25388 /* If we start with a return insn, we only succeed if we find another one. */
25389 int seeking_return
= 0;
25390 enum rtx_code return_code
= UNKNOWN
;
25392 /* START_INSN will hold the insn from where we start looking. This is the
25393 first insn after the following code_label if REVERSE is true. */
25394 rtx_insn
*start_insn
= insn
;
25396 /* If in state 4, check if the target branch is reached, in order to
25397 change back to state 0. */
25398 if (arm_ccfsm_state
== 4)
25400 if (insn
== arm_target_insn
)
25402 arm_target_insn
= NULL
;
25403 arm_ccfsm_state
= 0;
25408 /* If in state 3, it is possible to repeat the trick, if this insn is an
25409 unconditional branch to a label, and immediately following this branch
25410 is the previous target label which is only used once, and the label this
25411 branch jumps to is not too far off. */
25412 if (arm_ccfsm_state
== 3)
25414 if (simplejump_p (insn
))
25416 start_insn
= next_nonnote_insn (start_insn
);
25417 if (BARRIER_P (start_insn
))
25419 /* XXX Isn't this always a barrier? */
25420 start_insn
= next_nonnote_insn (start_insn
);
25422 if (LABEL_P (start_insn
)
25423 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
25424 && LABEL_NUSES (start_insn
) == 1)
25429 else if (ANY_RETURN_P (body
))
25431 start_insn
= next_nonnote_insn (start_insn
);
25432 if (BARRIER_P (start_insn
))
25433 start_insn
= next_nonnote_insn (start_insn
);
25434 if (LABEL_P (start_insn
)
25435 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
25436 && LABEL_NUSES (start_insn
) == 1)
25439 seeking_return
= 1;
25440 return_code
= GET_CODE (body
);
25449 gcc_assert (!arm_ccfsm_state
|| reverse
);
25450 if (!JUMP_P (insn
))
25453 /* This jump might be paralleled with a clobber of the condition codes
25454 the jump should always come first */
25455 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
25456 body
= XVECEXP (body
, 0, 0);
25459 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
25460 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
25463 int fail
= FALSE
, succeed
= FALSE
;
25464 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25465 int then_not_else
= TRUE
;
25466 rtx_insn
*this_insn
= start_insn
;
25469 /* Register the insn jumped to. */
25472 if (!seeking_return
)
25473 label
= XEXP (SET_SRC (body
), 0);
25475 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
25476 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
25477 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
25479 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
25480 then_not_else
= FALSE
;
25482 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
25484 seeking_return
= 1;
25485 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
25487 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
25489 seeking_return
= 1;
25490 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
25491 then_not_else
= FALSE
;
25494 gcc_unreachable ();
25496 /* See how many insns this branch skips, and what kind of insns. If all
25497 insns are okay, and the label or unconditional branch to the same
25498 label is not too far away, succeed. */
25499 for (insns_skipped
= 0;
25500 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
25504 this_insn
= next_nonnote_insn (this_insn
);
25508 switch (GET_CODE (this_insn
))
25511 /* Succeed if it is the target label, otherwise fail since
25512 control falls in from somewhere else. */
25513 if (this_insn
== label
)
25515 arm_ccfsm_state
= 1;
25523 /* Succeed if the following insn is the target label.
25525 If return insns are used then the last insn in a function
25526 will be a barrier. */
25527 this_insn
= next_nonnote_insn (this_insn
);
25528 if (this_insn
&& this_insn
== label
)
25530 arm_ccfsm_state
= 1;
25538 /* The AAPCS says that conditional calls should not be
25539 used since they make interworking inefficient (the
25540 linker can't transform BL<cond> into BLX). That's
25541 only a problem if the machine has BLX. */
25548 /* Succeed if the following insn is the target label, or
25549 if the following two insns are a barrier and the
25551 this_insn
= next_nonnote_insn (this_insn
);
25552 if (this_insn
&& BARRIER_P (this_insn
))
25553 this_insn
= next_nonnote_insn (this_insn
);
25555 if (this_insn
&& this_insn
== label
25556 && insns_skipped
< max_insns_skipped
)
25558 arm_ccfsm_state
= 1;
25566 /* If this is an unconditional branch to the same label, succeed.
25567 If it is to another label, do nothing. If it is conditional,
25569 /* XXX Probably, the tests for SET and the PC are
25572 scanbody
= PATTERN (this_insn
);
25573 if (GET_CODE (scanbody
) == SET
25574 && GET_CODE (SET_DEST (scanbody
)) == PC
)
25576 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
25577 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
25579 arm_ccfsm_state
= 2;
25582 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
25585 /* Fail if a conditional return is undesirable (e.g. on a
25586 StrongARM), but still allow this if optimizing for size. */
25587 else if (GET_CODE (scanbody
) == return_code
25588 && !use_return_insn (TRUE
, NULL
)
25591 else if (GET_CODE (scanbody
) == return_code
)
25593 arm_ccfsm_state
= 2;
25596 else if (GET_CODE (scanbody
) == PARALLEL
)
25598 switch (get_attr_conds (this_insn
))
25608 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
25613 /* Instructions using or affecting the condition codes make it
25615 scanbody
= PATTERN (this_insn
);
25616 if (!(GET_CODE (scanbody
) == SET
25617 || GET_CODE (scanbody
) == PARALLEL
)
25618 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
25628 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
25629 arm_target_label
= CODE_LABEL_NUMBER (label
);
25632 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
25634 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
25636 this_insn
= next_nonnote_insn (this_insn
);
25637 gcc_assert (!this_insn
25638 || (!BARRIER_P (this_insn
)
25639 && !LABEL_P (this_insn
)));
25643 /* Oh, dear! we ran off the end.. give up. */
25644 extract_constrain_insn_cached (insn
);
25645 arm_ccfsm_state
= 0;
25646 arm_target_insn
= NULL
;
25649 arm_target_insn
= this_insn
;
25652 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25655 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
25657 if (reverse
|| then_not_else
)
25658 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
25661 /* Restore recog_data (getting the attributes of other insns can
25662 destroy this array, but final.cc assumes that it remains intact
25663 across this call. */
25664 extract_constrain_insn_cached (insn
);
25668 /* Output IT instructions. */
25670 thumb2_asm_output_opcode (FILE * stream
)
25675 if (arm_condexec_mask
)
25677 for (n
= 0; n
< arm_condexec_masklen
; n
++)
25678 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
25680 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
25681 arm_condition_codes
[arm_current_cc
]);
25682 arm_condexec_mask
= 0;
25686 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25687 UNITS_PER_WORD bytes wide. */
25688 static unsigned int
25689 arm_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
25691 if (IS_VPR_REGNUM (regno
))
25692 return CEIL (GET_MODE_SIZE (mode
), 2);
25695 && regno
> PC_REGNUM
25696 && regno
!= FRAME_POINTER_REGNUM
25697 && regno
!= ARG_POINTER_REGNUM
25698 && !IS_VFP_REGNUM (regno
))
25701 return ARM_NUM_REGS (mode
);
25704 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25706 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
25708 if (GET_MODE_CLASS (mode
) == MODE_CC
)
25709 return (regno
== CC_REGNUM
25710 || (TARGET_VFP_BASE
25711 && regno
== VFPCC_REGNUM
));
25713 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
25716 if (IS_VPR_REGNUM (regno
))
25717 return VALID_MVE_PRED_MODE (mode
);
25720 /* For the Thumb we only allow values bigger than SImode in
25721 registers 0 - 6, so that there is always a second low
25722 register available to hold the upper part of the value.
25723 We probably we ought to ensure that the register is the
25724 start of an even numbered register pair. */
25725 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
25727 if (TARGET_VFP_BASE
&& IS_VFP_REGNUM (regno
))
25729 if (mode
== DFmode
|| mode
== DImode
)
25730 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
25732 if (mode
== HFmode
|| mode
== BFmode
|| mode
== HImode
25733 || mode
== SFmode
|| mode
== SImode
)
25734 return VFP_REGNO_OK_FOR_SINGLE (regno
);
25737 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
25738 || (VALID_NEON_QREG_MODE (mode
)
25739 && NEON_REGNO_OK_FOR_QUAD (regno
))
25740 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
25741 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
25742 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25743 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
25744 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
25745 if (TARGET_HAVE_MVE
)
25746 return ((VALID_MVE_MODE (mode
) && NEON_REGNO_OK_FOR_QUAD (regno
))
25747 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25748 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8)));
25753 if (TARGET_REALLY_IWMMXT
)
25755 if (IS_IWMMXT_GR_REGNUM (regno
))
25756 return mode
== SImode
;
25758 if (IS_IWMMXT_REGNUM (regno
))
25759 return VALID_IWMMXT_REG_MODE (mode
);
25762 /* We allow almost any value to be stored in the general registers.
25763 Restrict doubleword quantities to even register pairs in ARM state
25764 so that we can use ldrd. The same restriction applies for MVE
25765 in order to support Armv8.1-M Mainline instructions.
25766 Do not allow very large Neon structure opaque modes in general
25767 registers; they would use too many. */
25768 if (regno
<= LAST_ARM_REGNUM
)
25770 if (ARM_NUM_REGS (mode
) > 4)
25773 if (TARGET_THUMB2
&& !(TARGET_HAVE_MVE
|| TARGET_CDE
))
25776 return !((TARGET_LDRD
|| TARGET_CDE
)
25777 && GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
25780 if (regno
== FRAME_POINTER_REGNUM
25781 || regno
== ARG_POINTER_REGNUM
)
25782 /* We only allow integers in the fake hard registers. */
25783 return GET_MODE_CLASS (mode
) == MODE_INT
;
25788 /* Implement TARGET_MODES_TIEABLE_P. */
25791 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
25793 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
25796 if (TARGET_HAVE_MVE
25797 && (VALID_MVE_PRED_MODE (mode1
) && VALID_MVE_PRED_MODE (mode2
)))
25800 /* We specifically want to allow elements of "structure" modes to
25801 be tieable to the structure. This more general condition allows
25802 other rarer situations too. */
25804 && (VALID_NEON_DREG_MODE (mode1
)
25805 || VALID_NEON_QREG_MODE (mode1
)
25806 || VALID_NEON_STRUCT_MODE (mode1
))
25807 && (VALID_NEON_DREG_MODE (mode2
)
25808 || VALID_NEON_QREG_MODE (mode2
)
25809 || VALID_NEON_STRUCT_MODE (mode2
)))
25810 || (TARGET_HAVE_MVE
25811 && (VALID_MVE_MODE (mode1
)
25812 || VALID_MVE_STRUCT_MODE (mode1
))
25813 && (VALID_MVE_MODE (mode2
)
25814 || VALID_MVE_STRUCT_MODE (mode2
))))
25820 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25821 not used in arm mode. */
25824 arm_regno_class (int regno
)
25826 if (regno
== PC_REGNUM
)
25829 if (IS_VPR_REGNUM (regno
))
25832 if (IS_PAC_REGNUM (regno
))
25837 if (regno
== STACK_POINTER_REGNUM
)
25839 if (regno
== CC_REGNUM
)
25846 if (TARGET_THUMB2
&& regno
< 8)
25849 if ( regno
<= LAST_ARM_REGNUM
25850 || regno
== FRAME_POINTER_REGNUM
25851 || regno
== ARG_POINTER_REGNUM
)
25852 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
25854 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
25855 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
25857 if (IS_VFP_REGNUM (regno
))
25859 if (regno
<= D7_VFP_REGNUM
)
25860 return VFP_D0_D7_REGS
;
25861 else if (regno
<= LAST_LO_VFP_REGNUM
)
25862 return VFP_LO_REGS
;
25864 return VFP_HI_REGS
;
25867 if (IS_IWMMXT_REGNUM (regno
))
25868 return IWMMXT_REGS
;
25870 if (IS_IWMMXT_GR_REGNUM (regno
))
25871 return IWMMXT_GR_REGS
;
25876 /* Handle a special case when computing the offset
25877 of an argument from the frame pointer. */
25879 arm_debugger_arg_offset (int value
, rtx addr
)
25883 /* We are only interested if dbxout_parms() failed to compute the offset. */
25887 /* We can only cope with the case where the address is held in a register. */
25891 /* If we are using the frame pointer to point at the argument, then
25892 an offset of 0 is correct. */
25893 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
25896 /* If we are using the stack pointer to point at the
25897 argument, then an offset of 0 is correct. */
25898 /* ??? Check this is consistent with thumb2 frame layout. */
25899 if ((TARGET_THUMB
|| !frame_pointer_needed
)
25900 && REGNO (addr
) == SP_REGNUM
)
25903 /* Oh dear. The argument is pointed to by a register rather
25904 than being held in a register, or being stored at a known
25905 offset from the frame pointer. Since GDB only understands
25906 those two kinds of argument we must translate the address
25907 held in the register into an offset from the frame pointer.
25908 We do this by searching through the insns for the function
25909 looking to see where this register gets its value. If the
25910 register is initialized from the frame pointer plus an offset
25911 then we are in luck and we can continue, otherwise we give up.
25913 This code is exercised by producing debugging information
25914 for a function with arguments like this:
25916 double func (double a, double b, int c, double d) {return d;}
25918 Without this code the stab for parameter 'd' will be set to
25919 an offset of 0 from the frame pointer, rather than 8. */
25921 /* The if() statement says:
25923 If the insn is a normal instruction
25924 and if the insn is setting the value in a register
25925 and if the register being set is the register holding the address of the argument
25926 and if the address is computing by an addition
25927 that involves adding to a register
25928 which is the frame pointer
25933 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
25935 if ( NONJUMP_INSN_P (insn
)
25936 && GET_CODE (PATTERN (insn
)) == SET
25937 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
25938 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
25939 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
25940 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25941 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
25944 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
25953 warning (0, "unable to compute real location of stacked parameter");
25954 value
= 8; /* XXX magic hack */
25960 /* Implement TARGET_PROMOTED_TYPE. */
25963 arm_promoted_type (const_tree t
)
25965 if (SCALAR_FLOAT_TYPE_P (t
)
25966 && TYPE_PRECISION (t
) == 16
25967 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
25968 return float_type_node
;
25972 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25973 This simply adds HFmode as a supported mode; even though we don't
25974 implement arithmetic on this type directly, it's supported by
25975 optabs conversions, much the way the double-word arithmetic is
25976 special-cased in the default hook. */
25979 arm_scalar_mode_supported_p (scalar_mode mode
)
25981 if (mode
== HFmode
)
25982 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
25983 else if (ALL_FIXED_POINT_MODE_P (mode
))
25986 return default_scalar_mode_supported_p (mode
);
25989 /* Set the value of FLT_EVAL_METHOD.
25990 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25992 0: evaluate all operations and constants, whose semantic type has at
25993 most the range and precision of type float, to the range and
25994 precision of float; evaluate all other operations and constants to
25995 the range and precision of the semantic type;
25997 N, where _FloatN is a supported interchange floating type
25998 evaluate all operations and constants, whose semantic type has at
25999 most the range and precision of _FloatN type, to the range and
26000 precision of the _FloatN type; evaluate all other operations and
26001 constants to the range and precision of the semantic type;
26003 If we have the ARMv8.2-A extensions then we support _Float16 in native
26004 precision, so we should set this to 16. Otherwise, we support the type,
26005 but want to evaluate expressions in float precision, so set this to
26008 static enum flt_eval_method
26009 arm_excess_precision (enum excess_precision_type type
)
26013 case EXCESS_PRECISION_TYPE_FAST
:
26014 case EXCESS_PRECISION_TYPE_STANDARD
:
26015 /* We can calculate either in 16-bit range and precision or
26016 32-bit range and precision. Make that decision based on whether
26017 we have native support for the ARMv8.2-A 16-bit floating-point
26018 instructions or not. */
26019 return (TARGET_VFP_FP16INST
26020 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
26021 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
26022 case EXCESS_PRECISION_TYPE_IMPLICIT
:
26023 case EXCESS_PRECISION_TYPE_FLOAT16
:
26024 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
26026 gcc_unreachable ();
26028 return FLT_EVAL_METHOD_UNPREDICTABLE
;
26032 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
26033 _Float16 if we are using anything other than ieee format for 16-bit
26034 floating point. Otherwise, punt to the default implementation. */
26035 static opt_scalar_float_mode
26036 arm_floatn_mode (int n
, bool extended
)
26038 if (!extended
&& n
== 16)
26040 if (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
)
26042 return opt_scalar_float_mode ();
26045 return default_floatn_mode (n
, extended
);
26049 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
26050 not to early-clobber SRC registers in the process.
26052 We assume that the operands described by SRC and DEST represent a
26053 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
26054 number of components into which the copy has been decomposed. */
26056 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
26060 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
26061 || REGNO (operands
[0]) < REGNO (operands
[1]))
26063 for (i
= 0; i
< count
; i
++)
26065 operands
[2 * i
] = dest
[i
];
26066 operands
[2 * i
+ 1] = src
[i
];
26071 for (i
= 0; i
< count
; i
++)
26073 operands
[2 * i
] = dest
[count
- i
- 1];
26074 operands
[2 * i
+ 1] = src
[count
- i
- 1];
26079 /* Split operands into moves from op[1] + op[2] into op[0]. */
26082 neon_split_vcombine (rtx operands
[3])
26084 unsigned int dest
= REGNO (operands
[0]);
26085 unsigned int src1
= REGNO (operands
[1]);
26086 unsigned int src2
= REGNO (operands
[2]);
26087 machine_mode halfmode
= GET_MODE (operands
[1]);
26088 unsigned int halfregs
= REG_NREGS (operands
[1]);
26089 rtx destlo
, desthi
;
26091 if (src1
== dest
&& src2
== dest
+ halfregs
)
26093 /* No-op move. Can't split to nothing; emit something. */
26094 emit_note (NOTE_INSN_DELETED
);
26098 /* Preserve register attributes for variable tracking. */
26099 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
26100 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
26101 GET_MODE_SIZE (halfmode
));
26103 /* Special case of reversed high/low parts. Use VSWP. */
26104 if (src2
== dest
&& src1
== dest
+ halfregs
)
26106 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
26107 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
26108 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
26112 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
26114 /* Try to avoid unnecessary moves if part of the result
26115 is in the right place already. */
26117 emit_move_insn (destlo
, operands
[1]);
26118 if (src2
!= dest
+ halfregs
)
26119 emit_move_insn (desthi
, operands
[2]);
26123 if (src2
!= dest
+ halfregs
)
26124 emit_move_insn (desthi
, operands
[2]);
26126 emit_move_insn (destlo
, operands
[1]);
26130 /* Return the number (counting from 0) of
26131 the least significant set bit in MASK. */
26134 number_of_first_bit_set (unsigned mask
)
26136 return ctz_hwi (mask
);
26139 /* Like emit_multi_reg_push, but allowing for a different set of
26140 registers to be described as saved. MASK is the set of registers
26141 to be saved; REAL_REGS is the set of registers to be described as
26142 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26145 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
26147 unsigned long regno
;
26148 rtx par
[10], tmp
, reg
;
26152 /* Build the parallel of the registers actually being stored. */
26153 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
26155 regno
= ctz_hwi (mask
);
26156 reg
= gen_rtx_REG (SImode
, regno
);
26159 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
26161 tmp
= gen_rtx_USE (VOIDmode
, reg
);
26166 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26167 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
26168 tmp
= gen_frame_mem (BLKmode
, tmp
);
26169 tmp
= gen_rtx_SET (tmp
, par
[0]);
26172 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
26173 insn
= emit_insn (tmp
);
26175 /* Always build the stack adjustment note for unwind info. */
26176 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26177 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
26180 /* Build the parallel of the registers recorded as saved for unwind. */
26181 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
26183 regno
= ctz_hwi (real_regs
);
26184 reg
= gen_rtx_REG (SImode
, regno
);
26186 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
26187 tmp
= gen_frame_mem (SImode
, tmp
);
26188 tmp
= gen_rtx_SET (tmp
, reg
);
26189 RTX_FRAME_RELATED_P (tmp
) = 1;
26197 RTX_FRAME_RELATED_P (par
[0]) = 1;
26198 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
26201 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
26206 /* Emit code to push or pop registers to or from the stack. F is the
26207 assembly file. MASK is the registers to pop. */
26209 thumb_pop (FILE *f
, unsigned long mask
)
26212 int lo_mask
= mask
& 0xFF;
26216 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
26218 /* Special case. Do not generate a POP PC statement here, do it in
26220 thumb_exit (f
, -1);
26224 fprintf (f
, "\tpop\t{");
26226 /* Look at the low registers first. */
26227 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
26231 asm_fprintf (f
, "%r", regno
);
26233 if ((lo_mask
& ~1) != 0)
26238 if (mask
& (1 << PC_REGNUM
))
26240 /* Catch popping the PC. */
26241 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
26242 || IS_CMSE_ENTRY (arm_current_func_type ()))
26244 /* The PC is never poped directly, instead
26245 it is popped into r3 and then BX is used. */
26246 fprintf (f
, "}\n");
26248 thumb_exit (f
, -1);
26257 asm_fprintf (f
, "%r", PC_REGNUM
);
26261 fprintf (f
, "}\n");
26264 /* Generate code to return from a thumb function.
26265 If 'reg_containing_return_addr' is -1, then the return address is
26266 actually on the stack, at the stack pointer.
26268 Note: do not forget to update length attribute of corresponding insn pattern
26269 when changing assembly output (eg. length attribute of epilogue_insns when
26270 updating Armv8-M Baseline Security Extensions register clearing
26273 thumb_exit (FILE *f
, int reg_containing_return_addr
)
26275 unsigned regs_available_for_popping
;
26276 unsigned regs_to_pop
;
26278 unsigned available
;
26282 int restore_a4
= FALSE
;
26284 /* Compute the registers we need to pop. */
26288 if (reg_containing_return_addr
== -1)
26290 regs_to_pop
|= 1 << LR_REGNUM
;
26294 if (TARGET_BACKTRACE
)
26296 /* Restore the (ARM) frame pointer and stack pointer. */
26297 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
26301 /* If there is nothing to pop then just emit the BX instruction and
26303 if (pops_needed
== 0)
26305 if (crtl
->calls_eh_return
)
26306 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26308 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26310 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26311 emitted by cmse_nonsecure_entry_clear_before_return (). */
26312 if (!TARGET_HAVE_FPCXT_CMSE
)
26313 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
26314 reg_containing_return_addr
);
26315 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
26318 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26321 /* Otherwise if we are not supporting interworking and we have not created
26322 a backtrace structure and the function was not entered in ARM mode then
26323 just pop the return address straight into the PC. */
26324 else if (!TARGET_INTERWORK
26325 && !TARGET_BACKTRACE
26326 && !is_called_in_ARM_mode (current_function_decl
)
26327 && !crtl
->calls_eh_return
26328 && !IS_CMSE_ENTRY (arm_current_func_type ()))
26330 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
26334 /* Find out how many of the (return) argument registers we can corrupt. */
26335 regs_available_for_popping
= 0;
26337 /* If returning via __builtin_eh_return, the bottom three registers
26338 all contain information needed for the return. */
26339 if (crtl
->calls_eh_return
)
26343 /* If we can deduce the registers used from the function's
26344 return value. This is more reliable that examining
26345 df_regs_ever_live_p () because that will be set if the register is
26346 ever used in the function, not just if the register is used
26347 to hold a return value. */
26349 if (crtl
->return_rtx
!= 0)
26350 mode
= GET_MODE (crtl
->return_rtx
);
26352 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
26354 size
= GET_MODE_SIZE (mode
);
26358 /* In a void function we can use any argument register.
26359 In a function that returns a structure on the stack
26360 we can use the second and third argument registers. */
26361 if (mode
== VOIDmode
)
26362 regs_available_for_popping
=
26363 (1 << ARG_REGISTER (1))
26364 | (1 << ARG_REGISTER (2))
26365 | (1 << ARG_REGISTER (3));
26367 regs_available_for_popping
=
26368 (1 << ARG_REGISTER (2))
26369 | (1 << ARG_REGISTER (3));
26371 else if (size
<= 4)
26372 regs_available_for_popping
=
26373 (1 << ARG_REGISTER (2))
26374 | (1 << ARG_REGISTER (3));
26375 else if (size
<= 8)
26376 regs_available_for_popping
=
26377 (1 << ARG_REGISTER (3));
26380 /* Match registers to be popped with registers into which we pop them. */
26381 for (available
= regs_available_for_popping
,
26382 required
= regs_to_pop
;
26383 required
!= 0 && available
!= 0;
26384 available
&= ~(available
& - available
),
26385 required
&= ~(required
& - required
))
26388 /* If we have any popping registers left over, remove them. */
26390 regs_available_for_popping
&= ~available
;
26392 /* Otherwise if we need another popping register we can use
26393 the fourth argument register. */
26394 else if (pops_needed
)
26396 /* If we have not found any free argument registers and
26397 reg a4 contains the return address, we must move it. */
26398 if (regs_available_for_popping
== 0
26399 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
26401 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26402 reg_containing_return_addr
= LR_REGNUM
;
26404 else if (size
> 12)
26406 /* Register a4 is being used to hold part of the return value,
26407 but we have dire need of a free, low register. */
26410 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
26413 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
26415 /* The fourth argument register is available. */
26416 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
26422 /* Pop as many registers as we can. */
26423 thumb_pop (f
, regs_available_for_popping
);
26425 /* Process the registers we popped. */
26426 if (reg_containing_return_addr
== -1)
26428 /* The return address was popped into the lowest numbered register. */
26429 regs_to_pop
&= ~(1 << LR_REGNUM
);
26431 reg_containing_return_addr
=
26432 number_of_first_bit_set (regs_available_for_popping
);
26434 /* Remove this register for the mask of available registers, so that
26435 the return address will not be corrupted by further pops. */
26436 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
26439 /* If we popped other registers then handle them here. */
26440 if (regs_available_for_popping
)
26444 /* Work out which register currently contains the frame pointer. */
26445 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26447 /* Move it into the correct place. */
26448 asm_fprintf (f
, "\tmov\t%r, %r\n",
26449 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
26451 /* (Temporarily) remove it from the mask of popped registers. */
26452 regs_available_for_popping
&= ~(1 << frame_pointer
);
26453 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
26455 if (regs_available_for_popping
)
26459 /* We popped the stack pointer as well,
26460 find the register that contains it. */
26461 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26463 /* Move it into the stack register. */
26464 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
26466 /* At this point we have popped all necessary registers, so
26467 do not worry about restoring regs_available_for_popping
26468 to its correct value:
26470 assert (pops_needed == 0)
26471 assert (regs_available_for_popping == (1 << frame_pointer))
26472 assert (regs_to_pop == (1 << STACK_POINTER)) */
26476 /* Since we have just move the popped value into the frame
26477 pointer, the popping register is available for reuse, and
26478 we know that we still have the stack pointer left to pop. */
26479 regs_available_for_popping
|= (1 << frame_pointer
);
26483 /* If we still have registers left on the stack, but we no longer have
26484 any registers into which we can pop them, then we must move the return
26485 address into the link register and make available the register that
26487 if (regs_available_for_popping
== 0 && pops_needed
> 0)
26489 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
26491 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
26492 reg_containing_return_addr
);
26494 reg_containing_return_addr
= LR_REGNUM
;
26497 /* If we have registers left on the stack then pop some more.
26498 We know that at most we will want to pop FP and SP. */
26499 if (pops_needed
> 0)
26504 thumb_pop (f
, regs_available_for_popping
);
26506 /* We have popped either FP or SP.
26507 Move whichever one it is into the correct register. */
26508 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26509 move_to
= number_of_first_bit_set (regs_to_pop
);
26511 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
26515 /* If we still have not popped everything then we must have only
26516 had one register available to us and we are now popping the SP. */
26517 if (pops_needed
> 0)
26521 thumb_pop (f
, regs_available_for_popping
);
26523 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26525 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
26527 assert (regs_to_pop == (1 << STACK_POINTER))
26528 assert (pops_needed == 1)
26532 /* If necessary restore the a4 register. */
26535 if (reg_containing_return_addr
!= LR_REGNUM
)
26537 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26538 reg_containing_return_addr
= LR_REGNUM
;
26541 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
26544 if (crtl
->calls_eh_return
)
26545 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26547 /* Return to caller. */
26548 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26550 /* This is for the cases where LR is not being used to contain the return
26551 address. It may therefore contain information that we might not want
26552 to leak, hence it must be cleared. The value in R0 will never be a
26553 secret at this point, so it is safe to use it, see the clearing code
26554 in cmse_nonsecure_entry_clear_before_return (). */
26555 if (reg_containing_return_addr
!= LR_REGNUM
)
26556 asm_fprintf (f
, "\tmov\tlr, r0\n");
26558 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26559 by cmse_nonsecure_entry_clear_before_return (). */
26560 if (!TARGET_HAVE_FPCXT_CMSE
)
26561 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
26562 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
26565 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26568 /* Scan INSN just before assembler is output for it.
26569 For Thumb-1, we track the status of the condition codes; this
26570 information is used in the cbranchsi4_insn pattern. */
26572 thumb1_final_prescan_insn (rtx_insn
*insn
)
26574 if (flag_print_asm_name
)
26575 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
26576 INSN_ADDRESSES (INSN_UID (insn
)));
26577 /* Don't overwrite the previous setter when we get to a cbranch. */
26578 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
26580 enum attr_conds conds
;
26582 if (cfun
->machine
->thumb1_cc_insn
)
26584 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
26585 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
26588 conds
= get_attr_conds (insn
);
26589 if (conds
== CONDS_SET
)
26591 rtx set
= single_set (insn
);
26592 cfun
->machine
->thumb1_cc_insn
= insn
;
26593 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
26594 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
26595 cfun
->machine
->thumb1_cc_mode
= CC_NZmode
;
26596 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
26598 rtx src1
= XEXP (SET_SRC (set
), 1);
26599 if (src1
== const0_rtx
)
26600 cfun
->machine
->thumb1_cc_mode
= CCmode
;
26602 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
26604 /* Record the src register operand instead of dest because
26605 cprop_hardreg pass propagates src. */
26606 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
26609 else if (conds
!= CONDS_NOCOND
)
26610 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
26613 /* Check if unexpected far jump is used. */
26614 if (cfun
->machine
->lr_save_eliminated
26615 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26616 internal_error("Unexpected thumb1 far jump");
26620 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
26622 unsigned HOST_WIDE_INT mask
= 0xff;
26625 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
26626 if (val
== 0) /* XXX */
26629 for (i
= 0; i
< 25; i
++)
26630 if ((val
& (mask
<< i
)) == val
)
26636 /* Returns nonzero if the current function contains,
26637 or might contain a far jump. */
26639 thumb_far_jump_used_p (void)
26642 bool far_jump
= false;
26643 unsigned int func_size
= 0;
26645 /* If we have already decided that far jumps may be used,
26646 do not bother checking again, and always return true even if
26647 it turns out that they are not being used. Once we have made
26648 the decision that far jumps are present (and that hence the link
26649 register will be pushed onto the stack) we cannot go back on it. */
26650 if (cfun
->machine
->far_jump_used
)
26653 /* If this function is not being called from the prologue/epilogue
26654 generation code then it must be being called from the
26655 INITIAL_ELIMINATION_OFFSET macro. */
26656 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
26658 /* In this case we know that we are being asked about the elimination
26659 of the arg pointer register. If that register is not being used,
26660 then there are no arguments on the stack, and we do not have to
26661 worry that a far jump might force the prologue to push the link
26662 register, changing the stack offsets. In this case we can just
26663 return false, since the presence of far jumps in the function will
26664 not affect stack offsets.
26666 If the arg pointer is live (or if it was live, but has now been
26667 eliminated and so set to dead) then we do have to test to see if
26668 the function might contain a far jump. This test can lead to some
26669 false negatives, since before reload is completed, then length of
26670 branch instructions is not known, so gcc defaults to returning their
26671 longest length, which in turn sets the far jump attribute to true.
26673 A false negative will not result in bad code being generated, but it
26674 will result in a needless push and pop of the link register. We
26675 hope that this does not occur too often.
26677 If we need doubleword stack alignment this could affect the other
26678 elimination offsets so we can't risk getting it wrong. */
26679 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
26680 cfun
->machine
->arg_pointer_live
= 1;
26681 else if (!cfun
->machine
->arg_pointer_live
)
26685 /* We should not change far_jump_used during or after reload, as there is
26686 no chance to change stack frame layout. */
26687 if (reload_in_progress
|| reload_completed
)
26690 /* Check to see if the function contains a branch
26691 insn with the far jump attribute set. */
26692 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
26694 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26698 func_size
+= get_attr_length (insn
);
26701 /* Attribute far_jump will always be true for thumb1 before
26702 shorten_branch pass. So checking far_jump attribute before
26703 shorten_branch isn't much useful.
26705 Following heuristic tries to estimate more accurately if a far jump
26706 may finally be used. The heuristic is very conservative as there is
26707 no chance to roll-back the decision of not to use far jump.
26709 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26710 2-byte insn is associated with a 4 byte constant pool. Using
26711 function size 2048/3 as the threshold is conservative enough. */
26714 if ((func_size
* 3) >= 2048)
26716 /* Record the fact that we have decided that
26717 the function does use far jumps. */
26718 cfun
->machine
->far_jump_used
= 1;
26726 /* Return nonzero if FUNC must be entered in ARM mode. */
26728 is_called_in_ARM_mode (tree func
)
26730 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
26732 /* Ignore the problem about functions whose address is taken. */
26733 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
26737 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
26743 /* Given the stack offsets and register mask in OFFSETS, decide how
26744 many additional registers to push instead of subtracting a constant
26745 from SP. For epilogues the principle is the same except we use pop.
26746 FOR_PROLOGUE indicates which we're generating. */
26748 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
26750 HOST_WIDE_INT amount
;
26751 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
26752 /* Extract a mask of the ones we can give to the Thumb's push/pop
26754 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
26755 /* Then count how many other high registers will need to be pushed. */
26756 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26757 int n_free
, reg_base
, size
;
26759 if (!for_prologue
&& frame_pointer_needed
)
26760 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26762 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26764 /* If the stack frame size is 512 exactly, we can save one load
26765 instruction, which should make this a win even when optimizing
26767 if (!optimize_size
&& amount
!= 512)
26770 /* Can't do this if there are high registers to push. */
26771 if (high_regs_pushed
!= 0)
26774 /* Shouldn't do it in the prologue if no registers would normally
26775 be pushed at all. In the epilogue, also allow it if we'll have
26776 a pop insn for the PC. */
26779 || TARGET_BACKTRACE
26780 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
26781 || TARGET_INTERWORK
26782 || crtl
->args
.pretend_args_size
!= 0))
26785 /* Don't do this if thumb_expand_prologue wants to emit instructions
26786 between the push and the stack frame allocation. */
26788 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26789 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
26796 size
= arm_size_return_regs ();
26797 reg_base
= ARM_NUM_INTS (size
);
26798 live_regs_mask
>>= reg_base
;
26801 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
26802 && (for_prologue
|| call_used_or_fixed_reg_p (reg_base
+ n_free
)))
26804 live_regs_mask
>>= 1;
26810 gcc_assert (amount
/ 4 * 4 == amount
);
26812 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
26813 return (amount
- 508) / 4;
26814 if (amount
<= n_free
* 4)
26819 /* The bits which aren't usefully expanded as rtl. */
26821 thumb1_unexpanded_epilogue (void)
26823 arm_stack_offsets
*offsets
;
26825 unsigned long live_regs_mask
= 0;
26826 int high_regs_pushed
= 0;
26828 int had_to_push_lr
;
26831 if (cfun
->machine
->return_used_this_function
!= 0)
26834 if (IS_NAKED (arm_current_func_type ()))
26837 offsets
= arm_get_frame_offsets ();
26838 live_regs_mask
= offsets
->saved_regs_mask
;
26839 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26841 /* If we can deduce the registers used from the function's return value.
26842 This is more reliable that examining df_regs_ever_live_p () because that
26843 will be set if the register is ever used in the function, not just if
26844 the register is used to hold a return value. */
26845 size
= arm_size_return_regs ();
26847 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
26850 unsigned long extra_mask
= (1 << extra_pop
) - 1;
26851 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
26854 /* The prolog may have pushed some high registers to use as
26855 work registers. e.g. the testsuite file:
26856 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26857 compiles to produce:
26858 push {r4, r5, r6, r7, lr}
26862 as part of the prolog. We have to undo that pushing here. */
26864 if (high_regs_pushed
)
26866 unsigned long mask
= live_regs_mask
& 0xff;
26869 mask
|= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26872 /* Oh dear! We have no low registers into which we can pop
26875 ("no low registers available for popping high registers");
26877 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
26878 if (live_regs_mask
& (1 << next_hi_reg
))
26881 while (high_regs_pushed
)
26883 /* Find lo register(s) into which the high register(s) can
26885 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
26887 if (mask
& (1 << regno
))
26888 high_regs_pushed
--;
26889 if (high_regs_pushed
== 0)
26893 if (high_regs_pushed
== 0 && regno
>= 0)
26894 mask
&= ~((1 << regno
) - 1);
26896 /* Pop the values into the low register(s). */
26897 thumb_pop (asm_out_file
, mask
);
26899 /* Move the value(s) into the high registers. */
26900 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
26902 if (mask
& (1 << regno
))
26904 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
26907 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
26909 if (live_regs_mask
& (1 << next_hi_reg
))
26914 live_regs_mask
&= ~0x0f00;
26917 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
26918 live_regs_mask
&= 0xff;
26920 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
26922 /* Pop the return address into the PC. */
26923 if (had_to_push_lr
)
26924 live_regs_mask
|= 1 << PC_REGNUM
;
26926 /* Either no argument registers were pushed or a backtrace
26927 structure was created which includes an adjusted stack
26928 pointer, so just pop everything. */
26929 if (live_regs_mask
)
26930 thumb_pop (asm_out_file
, live_regs_mask
);
26932 /* We have either just popped the return address into the
26933 PC or it is was kept in LR for the entire function.
26934 Note that thumb_pop has already called thumb_exit if the
26935 PC was in the list. */
26936 if (!had_to_push_lr
)
26937 thumb_exit (asm_out_file
, LR_REGNUM
);
26941 /* Pop everything but the return address. */
26942 if (live_regs_mask
)
26943 thumb_pop (asm_out_file
, live_regs_mask
);
26945 if (had_to_push_lr
)
26949 /* We have no free low regs, so save one. */
26950 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
26954 /* Get the return address into a temporary register. */
26955 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
26959 /* Move the return address to lr. */
26960 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
26962 /* Restore the low register. */
26963 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
26968 regno
= LAST_ARG_REGNUM
;
26973 /* Remove the argument registers that were pushed onto the stack. */
26974 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
26975 SP_REGNUM
, SP_REGNUM
,
26976 crtl
->args
.pretend_args_size
);
26978 thumb_exit (asm_out_file
, regno
);
26984 /* Functions to save and restore machine-specific function data. */
26985 static struct machine_function
*
26986 arm_init_machine_status (void)
26988 struct machine_function
*machine
;
26989 machine
= ggc_cleared_alloc
<machine_function
> ();
26991 #if ARM_FT_UNKNOWN != 0
26992 machine
->func_type
= ARM_FT_UNKNOWN
;
26994 machine
->static_chain_stack_bytes
= -1;
26995 machine
->pacspval_needed
= 0;
26999 /* Return an RTX indicating where the return address to the
27000 calling function can be found. */
27002 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
27007 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
27010 /* Do anything needed before RTL is emitted for each function. */
27012 arm_init_expanders (void)
27014 /* Arrange to initialize and mark the machine per-function status. */
27015 init_machine_status
= arm_init_machine_status
;
27017 /* This is to stop the combine pass optimizing away the alignment
27018 adjustment of va_arg. */
27019 /* ??? It is claimed that this should not be necessary. */
27021 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
27024 /* Check that FUNC is called with a different mode. */
27027 arm_change_mode_p (tree func
)
27029 if (TREE_CODE (func
) != FUNCTION_DECL
)
27032 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
27035 callee_tree
= target_option_default_node
;
27037 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
27038 int flags
= callee_opts
->x_target_flags
;
27040 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
27043 /* Like arm_compute_initial_elimination offset. Simpler because there
27044 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27045 to point at the base of the local variables after static stack
27046 space for a function has been allocated. */
27049 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
27051 arm_stack_offsets
*offsets
;
27053 offsets
= arm_get_frame_offsets ();
27057 case ARG_POINTER_REGNUM
:
27060 case STACK_POINTER_REGNUM
:
27061 return offsets
->outgoing_args
- offsets
->saved_args
;
27063 case FRAME_POINTER_REGNUM
:
27064 return offsets
->soft_frame
- offsets
->saved_args
;
27066 case ARM_HARD_FRAME_POINTER_REGNUM
:
27067 return offsets
->saved_regs
- offsets
->saved_args
;
27069 case THUMB_HARD_FRAME_POINTER_REGNUM
:
27070 return offsets
->locals_base
- offsets
->saved_args
;
27073 gcc_unreachable ();
27077 case FRAME_POINTER_REGNUM
:
27080 case STACK_POINTER_REGNUM
:
27081 return offsets
->outgoing_args
- offsets
->soft_frame
;
27083 case ARM_HARD_FRAME_POINTER_REGNUM
:
27084 return offsets
->saved_regs
- offsets
->soft_frame
;
27086 case THUMB_HARD_FRAME_POINTER_REGNUM
:
27087 return offsets
->locals_base
- offsets
->soft_frame
;
27090 gcc_unreachable ();
27095 gcc_unreachable ();
27099 /* Generate the function's prologue. */
27102 thumb1_expand_prologue (void)
27106 HOST_WIDE_INT amount
;
27107 HOST_WIDE_INT size
;
27108 arm_stack_offsets
*offsets
;
27109 unsigned long func_type
;
27111 unsigned long live_regs_mask
;
27112 unsigned long l_mask
;
27113 unsigned high_regs_pushed
= 0;
27114 bool lr_needs_saving
;
27116 func_type
= arm_current_func_type ();
27118 /* Naked functions don't have prologues. */
27119 if (IS_NAKED (func_type
))
27121 if (flag_stack_usage_info
)
27122 current_function_static_stack_size
= 0;
27126 if (IS_INTERRUPT (func_type
))
27128 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
27132 if (is_called_in_ARM_mode (current_function_decl
))
27133 emit_insn (gen_prologue_thumb1_interwork ());
27135 offsets
= arm_get_frame_offsets ();
27136 live_regs_mask
= offsets
->saved_regs_mask
;
27137 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
27139 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27140 l_mask
= live_regs_mask
& 0x40ff;
27141 /* Then count how many other high registers will need to be pushed. */
27142 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
27144 if (crtl
->args
.pretend_args_size
)
27146 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
27148 if (cfun
->machine
->uses_anonymous_args
)
27150 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
27151 unsigned long mask
;
27153 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
27154 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
27156 insn
= thumb1_emit_multi_reg_push (mask
, 0);
27160 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27161 stack_pointer_rtx
, x
));
27163 RTX_FRAME_RELATED_P (insn
) = 1;
27166 if (TARGET_BACKTRACE
)
27168 HOST_WIDE_INT offset
= 0;
27169 unsigned work_register
;
27170 rtx work_reg
, x
, arm_hfp_rtx
;
27172 /* We have been asked to create a stack backtrace structure.
27173 The code looks like this:
27177 0 sub SP, #16 Reserve space for 4 registers.
27178 2 push {R7} Push low registers.
27179 4 add R7, SP, #20 Get the stack pointer before the push.
27180 6 str R7, [SP, #8] Store the stack pointer
27181 (before reserving the space).
27182 8 mov R7, PC Get hold of the start of this code + 12.
27183 10 str R7, [SP, #16] Store it.
27184 12 mov R7, FP Get hold of the current frame pointer.
27185 14 str R7, [SP, #4] Store it.
27186 16 mov R7, LR Get hold of the current return address.
27187 18 str R7, [SP, #12] Store it.
27188 20 add R7, SP, #16 Point at the start of the
27189 backtrace structure.
27190 22 mov FP, R7 Put this value into the frame pointer. */
27192 work_register
= thumb_find_work_register (live_regs_mask
);
27193 work_reg
= gen_rtx_REG (SImode
, work_register
);
27194 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
27196 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27197 stack_pointer_rtx
, GEN_INT (-16)));
27198 RTX_FRAME_RELATED_P (insn
) = 1;
27202 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
27203 RTX_FRAME_RELATED_P (insn
) = 1;
27204 lr_needs_saving
= false;
27206 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
27209 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
27210 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27212 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
27213 x
= gen_frame_mem (SImode
, x
);
27214 emit_move_insn (x
, work_reg
);
27216 /* Make sure that the instruction fetching the PC is in the right place
27217 to calculate "start of backtrace creation code + 12". */
27218 /* ??? The stores using the common WORK_REG ought to be enough to
27219 prevent the scheduler from doing anything weird. Failing that
27220 we could always move all of the following into an UNSPEC_VOLATILE. */
27223 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27224 emit_move_insn (work_reg
, x
);
27226 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27227 x
= gen_frame_mem (SImode
, x
);
27228 emit_move_insn (x
, work_reg
);
27230 emit_move_insn (work_reg
, arm_hfp_rtx
);
27232 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27233 x
= gen_frame_mem (SImode
, x
);
27234 emit_move_insn (x
, work_reg
);
27238 emit_move_insn (work_reg
, arm_hfp_rtx
);
27240 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27241 x
= gen_frame_mem (SImode
, x
);
27242 emit_move_insn (x
, work_reg
);
27244 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27245 emit_move_insn (work_reg
, x
);
27247 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27248 x
= gen_frame_mem (SImode
, x
);
27249 emit_move_insn (x
, work_reg
);
27252 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
27253 emit_move_insn (work_reg
, x
);
27255 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
27256 x
= gen_frame_mem (SImode
, x
);
27257 emit_move_insn (x
, work_reg
);
27259 x
= GEN_INT (offset
+ 12);
27260 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27262 emit_move_insn (arm_hfp_rtx
, work_reg
);
27264 /* Optimization: If we are not pushing any low registers but we are going
27265 to push some high registers then delay our first push. This will just
27266 be a push of LR and we can combine it with the push of the first high
27268 else if ((l_mask
& 0xff) != 0
27269 || (high_regs_pushed
== 0 && lr_needs_saving
))
27271 unsigned long mask
= l_mask
;
27272 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
27273 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
27274 RTX_FRAME_RELATED_P (insn
) = 1;
27275 lr_needs_saving
= false;
27278 if (high_regs_pushed
)
27280 unsigned pushable_regs
;
27281 unsigned next_hi_reg
;
27282 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
27283 : crtl
->args
.info
.nregs
;
27284 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
27286 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
27287 if (live_regs_mask
& (1 << next_hi_reg
))
27290 /* Here we need to mask out registers used for passing arguments
27291 even if they can be pushed. This is to avoid using them to
27292 stash the high registers. Such kind of stash may clobber the
27293 use of arguments. */
27294 pushable_regs
= l_mask
& (~arg_regs_mask
);
27295 pushable_regs
|= thumb1_prologue_unused_call_clobbered_lo_regs ();
27297 /* Normally, LR can be used as a scratch register once it has been
27298 saved; but if the function examines its own return address then
27299 the value is still live and we need to avoid using it. */
27300 bool return_addr_live
27301 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
27304 if (lr_needs_saving
|| return_addr_live
)
27305 pushable_regs
&= ~(1 << LR_REGNUM
);
27307 if (pushable_regs
== 0)
27308 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
27310 while (high_regs_pushed
> 0)
27312 unsigned long real_regs_mask
= 0;
27313 unsigned long push_mask
= 0;
27315 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
27317 if (pushable_regs
& (1 << regno
))
27319 emit_move_insn (gen_rtx_REG (SImode
, regno
),
27320 gen_rtx_REG (SImode
, next_hi_reg
));
27322 high_regs_pushed
--;
27323 real_regs_mask
|= (1 << next_hi_reg
);
27324 push_mask
|= (1 << regno
);
27326 if (high_regs_pushed
)
27328 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
27330 if (live_regs_mask
& (1 << next_hi_reg
))
27338 /* If we had to find a work register and we have not yet
27339 saved the LR then add it to the list of regs to push. */
27340 if (lr_needs_saving
)
27342 push_mask
|= 1 << LR_REGNUM
;
27343 real_regs_mask
|= 1 << LR_REGNUM
;
27344 lr_needs_saving
= false;
27345 /* If the return address is not live at this point, we
27346 can add LR to the list of registers that we can use
27348 if (!return_addr_live
)
27349 pushable_regs
|= 1 << LR_REGNUM
;
27352 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
27353 RTX_FRAME_RELATED_P (insn
) = 1;
27357 /* Load the pic register before setting the frame pointer,
27358 so we can use r7 as a temporary work register. */
27359 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
27360 arm_load_pic_register (live_regs_mask
, NULL_RTX
);
27362 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
27363 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
27364 stack_pointer_rtx
);
27366 size
= offsets
->outgoing_args
- offsets
->saved_args
;
27367 if (flag_stack_usage_info
)
27368 current_function_static_stack_size
= size
;
27370 /* If we have a frame, then do stack checking. FIXME: not implemented. */
27371 if ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27372 || flag_stack_clash_protection
)
27374 sorry ("%<-fstack-check=specific%> for Thumb-1");
27376 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27377 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
27382 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27383 GEN_INT (- amount
)));
27384 RTX_FRAME_RELATED_P (insn
) = 1;
27390 /* The stack decrement is too big for an immediate value in a single
27391 insn. In theory we could issue multiple subtracts, but after
27392 three of them it becomes more space efficient to place the full
27393 value in the constant pool and load into a register. (Also the
27394 ARM debugger really likes to see only one stack decrement per
27395 function). So instead we look for a scratch register into which
27396 we can load the decrement, and then we subtract this from the
27397 stack pointer. Unfortunately on the thumb the only available
27398 scratch registers are the argument registers, and we cannot use
27399 these as they may hold arguments to the function. Instead we
27400 attempt to locate a call preserved register which is used by this
27401 function. If we can find one, then we know that it will have
27402 been pushed at the start of the prologue and so we can corrupt
27404 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
27405 if (live_regs_mask
& (1 << regno
))
27408 gcc_assert(regno
<= LAST_LO_REGNUM
);
27410 reg
= gen_rtx_REG (SImode
, regno
);
27412 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
27414 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27415 stack_pointer_rtx
, reg
));
27417 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
27418 plus_constant (Pmode
, stack_pointer_rtx
,
27420 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
27421 RTX_FRAME_RELATED_P (insn
) = 1;
27425 if (frame_pointer_needed
)
27426 thumb_set_frame_pointer (offsets
);
27428 /* If we are profiling, make sure no instructions are scheduled before
27429 the call to mcount. Similarly if the user has requested no
27430 scheduling in the prolog. Similarly if we want non-call exceptions
27431 using the EABI unwinder, to prevent faulting instructions from being
27432 swapped with a stack adjustment. */
27433 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
27434 || (arm_except_unwind_info (&global_options
) == UI_TARGET
27435 && cfun
->can_throw_non_call_exceptions
))
27436 emit_insn (gen_blockage ());
27438 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
27439 if (live_regs_mask
& 0xff)
27440 cfun
->machine
->lr_save_eliminated
= 0;
27443 /* Clear caller saved registers not used to pass return values and leaked
27444 condition flags before exiting a cmse_nonsecure_entry function. */
27447 cmse_nonsecure_entry_clear_before_return (void)
27449 bool clear_vfpregs
= TARGET_HARD_FLOAT
|| TARGET_HAVE_FPCXT_CMSE
;
27450 int regno
, maxregno
= clear_vfpregs
? LAST_VFP_REGNUM
: IP_REGNUM
;
27451 uint32_t padding_bits_to_clear
= 0;
27452 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
27453 rtx r1_reg
, result_rtl
, clearing_reg
= NULL_RTX
;
27456 bitmap_clear (to_clear_bitmap
);
27457 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
27458 bitmap_set_bit (to_clear_bitmap
, IP_REGNUM
);
27460 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27464 int float_bits
= D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1;
27466 bitmap_set_range (to_clear_bitmap
, FIRST_VFP_REGNUM
, float_bits
);
27468 if (!TARGET_HAVE_FPCXT_CMSE
)
27470 /* Make sure we don't clear the two scratch registers used to clear
27471 the relevant FPSCR bits in output_return_instruction. */
27472 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
27473 bitmap_clear_bit (to_clear_bitmap
, IP_REGNUM
);
27474 emit_use (gen_rtx_REG (SImode
, 4));
27475 bitmap_clear_bit (to_clear_bitmap
, 4);
27479 /* If the user has defined registers to be caller saved, these are no longer
27480 restored by the function before returning and must thus be cleared for
27481 security purposes. */
27482 for (regno
= NUM_ARG_REGS
; regno
<= maxregno
; regno
++)
27484 /* We do not touch registers that can be used to pass arguments as per
27485 the AAPCS, since these should never be made callee-saved by user
27487 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
27489 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
27491 if (!callee_saved_reg_p (regno
)
27492 && (!IN_RANGE (regno
, FIRST_VFP_REGNUM
, LAST_VFP_REGNUM
)
27493 || TARGET_HARD_FLOAT
))
27494 bitmap_set_bit (to_clear_bitmap
, regno
);
27497 /* Make sure we do not clear the registers used to return the result in. */
27498 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
27499 if (!VOID_TYPE_P (result_type
))
27501 uint64_t to_clear_return_mask
;
27502 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
27504 /* No need to check that we return in registers, because we don't
27505 support returning on stack yet. */
27506 gcc_assert (REG_P (result_rtl
));
27507 to_clear_return_mask
27508 = compute_not_to_clear_mask (result_type
, result_rtl
, 0,
27509 &padding_bits_to_clear
);
27510 if (to_clear_return_mask
)
27512 gcc_assert ((unsigned) maxregno
< sizeof (long long) * __CHAR_BIT__
);
27513 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
27515 if (to_clear_return_mask
& (1ULL << regno
))
27516 bitmap_clear_bit (to_clear_bitmap
, regno
);
27521 if (padding_bits_to_clear
!= 0)
27523 int to_clear_bitmap_size
= SBITMAP_SIZE ((sbitmap
) to_clear_bitmap
);
27524 auto_sbitmap
to_clear_arg_regs_bitmap (to_clear_bitmap_size
);
27526 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27527 returning a composite type, which only uses r0. Let's make sure that
27528 r1-r3 is cleared too. */
27529 bitmap_clear (to_clear_arg_regs_bitmap
);
27530 bitmap_set_range (to_clear_arg_regs_bitmap
, R1_REGNUM
, NUM_ARG_REGS
- 1);
27531 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap
, to_clear_bitmap
));
27534 /* Clear full registers that leak before returning. */
27535 clearing_reg
= gen_rtx_REG (SImode
, TARGET_THUMB1
? R0_REGNUM
: LR_REGNUM
);
27536 r1_reg
= gen_rtx_REG (SImode
, R0_REGNUM
+ 1);
27537 cmse_clear_registers (to_clear_bitmap
, &padding_bits_to_clear
, 1, r1_reg
,
27541 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27542 POP instruction can be generated. LR should be replaced by PC. All
27543 the checks required are already done by USE_RETURN_INSN (). Hence,
27544 all we really need to check here is if single register is to be
27545 returned, or multiple register return. */
27547 thumb2_expand_return (bool simple_return
)
27550 unsigned long saved_regs_mask
;
27551 arm_stack_offsets
*offsets
;
27553 offsets
= arm_get_frame_offsets ();
27554 saved_regs_mask
= offsets
->saved_regs_mask
;
27556 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27557 if (saved_regs_mask
& (1 << i
))
27560 if (!simple_return
&& saved_regs_mask
)
27562 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27563 functions or adapt code to handle according to ACLE. This path should
27564 not be reachable for cmse_nonsecure_entry functions though we prefer
27565 to assert it for now to ensure that future code changes do not silently
27566 change this behavior. */
27567 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27568 if (arm_current_function_pac_enabled_p ())
27570 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
27571 arm_emit_multi_reg_pop (saved_regs_mask
);
27572 emit_insn (gen_aut_nop ());
27573 emit_jump_insn (simple_return_rtx
);
27575 else if (num_regs
== 1)
27577 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27578 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
27579 rtx addr
= gen_rtx_MEM (SImode
,
27580 gen_rtx_POST_INC (SImode
,
27581 stack_pointer_rtx
));
27582 set_mem_alias_set (addr
, get_frame_alias_set ());
27583 XVECEXP (par
, 0, 0) = ret_rtx
;
27584 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
27585 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
27586 emit_jump_insn (par
);
27590 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
27591 saved_regs_mask
|= (1 << PC_REGNUM
);
27592 arm_emit_multi_reg_pop (saved_regs_mask
);
27597 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27598 cmse_nonsecure_entry_clear_before_return ();
27599 emit_jump_insn (simple_return_rtx
);
27604 thumb1_expand_epilogue (void)
27606 HOST_WIDE_INT amount
;
27607 arm_stack_offsets
*offsets
;
27610 /* Naked functions don't have prologues. */
27611 if (IS_NAKED (arm_current_func_type ()))
27614 offsets
= arm_get_frame_offsets ();
27615 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27617 if (frame_pointer_needed
)
27619 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
27620 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27622 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
27624 gcc_assert (amount
>= 0);
27627 emit_insn (gen_blockage ());
27630 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27631 GEN_INT (amount
)));
27634 /* r3 is always free in the epilogue. */
27635 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
27637 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
27638 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
27642 /* Emit a USE (stack_pointer_rtx), so that
27643 the stack adjustment will not be deleted. */
27644 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27646 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
27647 emit_insn (gen_blockage ());
27649 /* Emit a clobber for each insn that will be restored in the epilogue,
27650 so that flow2 will get register lifetimes correct. */
27651 for (regno
= 0; regno
< 13; regno
++)
27652 if (reg_needs_saving_p (regno
))
27653 emit_clobber (gen_rtx_REG (SImode
, regno
));
27655 if (! df_regs_ever_live_p (LR_REGNUM
))
27656 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
27658 /* Clear all caller-saved regs that are not used to return. */
27659 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27660 cmse_nonsecure_entry_clear_before_return ();
27663 /* Epilogue code for APCS frame. */
27665 arm_expand_epilogue_apcs_frame (bool really_return
)
27667 unsigned long func_type
;
27668 unsigned long saved_regs_mask
;
27671 int floats_from_frame
= 0;
27672 arm_stack_offsets
*offsets
;
27674 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
27675 func_type
= arm_current_func_type ();
27677 /* Get frame offsets for ARM. */
27678 offsets
= arm_get_frame_offsets ();
27679 saved_regs_mask
= offsets
->saved_regs_mask
;
27681 /* Find the offset of the floating-point save area in the frame. */
27683 = (offsets
->saved_args
27684 + arm_compute_static_chain_stack_bytes ()
27687 /* Compute how many core registers saved and how far away the floats are. */
27688 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27689 if (saved_regs_mask
& (1 << i
))
27692 floats_from_frame
+= 4;
27695 if (TARGET_VFP_BASE
)
27698 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
27700 /* The offset is from IP_REGNUM. */
27701 int saved_size
= arm_get_vfp_saved_size ();
27702 if (saved_size
> 0)
27705 floats_from_frame
+= saved_size
;
27706 insn
= emit_insn (gen_addsi3 (ip_rtx
,
27707 hard_frame_pointer_rtx
,
27708 GEN_INT (-floats_from_frame
)));
27709 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
27710 ip_rtx
, hard_frame_pointer_rtx
);
27713 /* Generate VFP register multi-pop. */
27714 start_reg
= FIRST_VFP_REGNUM
;
27716 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
27717 /* Look for a case where a reg does not need restoring. */
27718 if (!reg_needs_saving_p (i
) && !reg_needs_saving_p (i
+ 1))
27720 if (start_reg
!= i
)
27721 arm_emit_vfp_multi_reg_pop (start_reg
,
27722 (i
- start_reg
) / 2,
27723 gen_rtx_REG (SImode
,
27728 /* Restore the remaining regs that we have discovered (or possibly
27729 even all of them, if the conditional in the for loop never
27731 if (start_reg
!= i
)
27732 arm_emit_vfp_multi_reg_pop (start_reg
,
27733 (i
- start_reg
) / 2,
27734 gen_rtx_REG (SImode
, IP_REGNUM
));
27739 /* The frame pointer is guaranteed to be non-double-word aligned, as
27740 it is set to double-word-aligned old_stack_pointer - 4. */
27742 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
27744 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
27745 if (reg_needs_saving_p (i
))
27747 rtx addr
= gen_frame_mem (V2SImode
,
27748 plus_constant (Pmode
, hard_frame_pointer_rtx
,
27750 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27751 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27752 gen_rtx_REG (V2SImode
, i
),
27758 /* saved_regs_mask should contain IP which contains old stack pointer
27759 at the time of activation creation. Since SP and IP are adjacent registers,
27760 we can restore the value directly into SP. */
27761 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
27762 saved_regs_mask
&= ~(1 << IP_REGNUM
);
27763 saved_regs_mask
|= (1 << SP_REGNUM
);
27765 /* There are two registers left in saved_regs_mask - LR and PC. We
27766 only need to restore LR (the return address), but to
27767 save time we can load it directly into PC, unless we need a
27768 special function exit sequence, or we are not really returning. */
27770 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
27771 && !crtl
->calls_eh_return
)
27772 /* Delete LR from the register mask, so that LR on
27773 the stack is loaded into the PC in the register mask. */
27774 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27776 saved_regs_mask
&= ~(1 << PC_REGNUM
);
27778 num_regs
= bit_count (saved_regs_mask
);
27779 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
27782 emit_insn (gen_blockage ());
27783 /* Unwind the stack to just below the saved registers. */
27784 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27785 hard_frame_pointer_rtx
,
27786 GEN_INT (- 4 * num_regs
)));
27788 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
27789 stack_pointer_rtx
, hard_frame_pointer_rtx
);
27792 arm_emit_multi_reg_pop (saved_regs_mask
);
27794 if (IS_INTERRUPT (func_type
))
27796 /* Interrupt handlers will have pushed the
27797 IP onto the stack, so restore it now. */
27799 rtx addr
= gen_rtx_MEM (SImode
,
27800 gen_rtx_POST_INC (SImode
,
27801 stack_pointer_rtx
));
27802 set_mem_alias_set (addr
, get_frame_alias_set ());
27803 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
27804 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27805 gen_rtx_REG (SImode
, IP_REGNUM
),
27809 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
27812 if (crtl
->calls_eh_return
)
27813 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27815 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27817 if (IS_STACKALIGN (func_type
))
27818 /* Restore the original stack pointer. Before prologue, the stack was
27819 realigned and the original stack pointer saved in r0. For details,
27820 see comment in arm_expand_prologue. */
27821 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
27823 emit_jump_insn (simple_return_rtx
);
27826 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27827 function is not a sibcall. */
27829 arm_expand_epilogue (bool really_return
)
27831 unsigned long func_type
;
27832 unsigned long saved_regs_mask
;
27836 arm_stack_offsets
*offsets
;
27838 func_type
= arm_current_func_type ();
27840 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27841 let output_return_instruction take care of instruction emission if any. */
27842 if (IS_NAKED (func_type
)
27843 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
27846 emit_jump_insn (simple_return_rtx
);
27850 /* If we are throwing an exception, then we really must be doing a
27851 return, so we can't tail-call. */
27852 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
27854 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
27856 arm_expand_epilogue_apcs_frame (really_return
);
27860 /* Get frame offsets for ARM. */
27861 offsets
= arm_get_frame_offsets ();
27862 saved_regs_mask
= offsets
->saved_regs_mask
;
27863 num_regs
= bit_count (saved_regs_mask
);
27865 if (frame_pointer_needed
)
27868 /* Restore stack pointer if necessary. */
27871 /* In ARM mode, frame pointer points to first saved register.
27872 Restore stack pointer to last saved register. */
27873 amount
= offsets
->frame
- offsets
->saved_regs
;
27875 /* Force out any pending memory operations that reference stacked data
27876 before stack de-allocation occurs. */
27877 emit_insn (gen_blockage ());
27878 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27879 hard_frame_pointer_rtx
,
27880 GEN_INT (amount
)));
27881 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27883 hard_frame_pointer_rtx
);
27885 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27887 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27891 /* In Thumb-2 mode, the frame pointer points to the last saved
27893 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27896 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
27897 hard_frame_pointer_rtx
,
27898 GEN_INT (amount
)));
27899 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27900 hard_frame_pointer_rtx
,
27901 hard_frame_pointer_rtx
);
27904 /* Force out any pending memory operations that reference stacked data
27905 before stack de-allocation occurs. */
27906 emit_insn (gen_blockage ());
27907 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
27908 hard_frame_pointer_rtx
));
27909 arm_add_cfa_adjust_cfa_note (insn
, 0,
27911 hard_frame_pointer_rtx
);
27912 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27914 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27919 /* Pop off outgoing args and local frame to adjust stack pointer to
27920 last saved register. */
27921 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27925 /* Force out any pending memory operations that reference stacked data
27926 before stack de-allocation occurs. */
27927 emit_insn (gen_blockage ());
27928 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27930 GEN_INT (amount
)));
27931 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27932 stack_pointer_rtx
, stack_pointer_rtx
);
27933 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27935 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27939 if (TARGET_VFP_BASE
)
27941 /* Generate VFP register multi-pop. */
27942 int end_reg
= LAST_VFP_REGNUM
+ 1;
27944 /* Scan the registers in reverse order. We need to match
27945 any groupings made in the prologue and generate matching
27946 vldm operations. The need to match groups is because,
27947 unlike pop, vldm can only do consecutive regs. */
27948 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
27949 /* Look for a case where a reg does not need restoring. */
27950 if (!reg_needs_saving_p (i
) && !reg_needs_saving_p (i
+ 1))
27952 /* Restore the regs discovered so far (from reg+2 to
27954 if (end_reg
> i
+ 2)
27955 arm_emit_vfp_multi_reg_pop (i
+ 2,
27956 (end_reg
- (i
+ 2)) / 2,
27957 stack_pointer_rtx
);
27961 /* Restore the remaining regs that we have discovered (or possibly
27962 even all of them, if the conditional in the for loop never
27964 if (end_reg
> i
+ 2)
27965 arm_emit_vfp_multi_reg_pop (i
+ 2,
27966 (end_reg
- (i
+ 2)) / 2,
27967 stack_pointer_rtx
);
27971 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
27972 if (reg_needs_saving_p (i
))
27975 rtx addr
= gen_rtx_MEM (V2SImode
,
27976 gen_rtx_POST_INC (SImode
,
27977 stack_pointer_rtx
));
27978 set_mem_alias_set (addr
, get_frame_alias_set ());
27979 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27980 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27981 gen_rtx_REG (V2SImode
, i
),
27983 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27984 stack_pointer_rtx
, stack_pointer_rtx
);
27987 if (saved_regs_mask
)
27990 bool return_in_pc
= false;
27992 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
27993 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
27994 && !IS_CMSE_ENTRY (func_type
)
27995 && !IS_STACKALIGN (func_type
)
27997 && crtl
->args
.pretend_args_size
== 0
27998 && saved_regs_mask
& (1 << LR_REGNUM
)
27999 && !crtl
->calls_eh_return
28000 && !arm_current_function_pac_enabled_p ())
28002 saved_regs_mask
&= ~(1 << LR_REGNUM
);
28003 saved_regs_mask
|= (1 << PC_REGNUM
);
28004 return_in_pc
= true;
28007 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
28009 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
28010 if (saved_regs_mask
& (1 << i
))
28012 rtx addr
= gen_rtx_MEM (SImode
,
28013 gen_rtx_POST_INC (SImode
,
28014 stack_pointer_rtx
));
28015 set_mem_alias_set (addr
, get_frame_alias_set ());
28017 if (i
== PC_REGNUM
)
28019 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
28020 XVECEXP (insn
, 0, 0) = ret_rtx
;
28021 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
28023 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
28024 insn
= emit_jump_insn (insn
);
28028 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
28030 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
28031 gen_rtx_REG (SImode
, i
),
28033 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
28035 stack_pointer_rtx
);
28042 && current_tune
->prefer_ldrd_strd
28043 && !optimize_function_for_size_p (cfun
))
28046 thumb2_emit_ldrd_pop (saved_regs_mask
);
28047 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
28048 arm_emit_ldrd_pop (saved_regs_mask
);
28050 arm_emit_multi_reg_pop (saved_regs_mask
);
28053 arm_emit_multi_reg_pop (saved_regs_mask
);
28061 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
28065 rtx dwarf
= NULL_RTX
;
28067 emit_insn (gen_addsi3 (stack_pointer_rtx
,
28069 GEN_INT (amount
)));
28071 RTX_FRAME_RELATED_P (tmp
) = 1;
28073 if (cfun
->machine
->uses_anonymous_args
)
28075 /* Restore pretend args. Refer arm_expand_prologue on how to save
28076 pretend_args in stack. */
28077 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
28078 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
28079 for (j
= 0, i
= 0; j
< num_regs
; i
++)
28080 if (saved_regs_mask
& (1 << i
))
28082 rtx reg
= gen_rtx_REG (SImode
, i
);
28083 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
28086 REG_NOTES (tmp
) = dwarf
;
28088 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
28089 stack_pointer_rtx
, stack_pointer_rtx
);
28092 if (IS_CMSE_ENTRY (func_type
))
28094 /* CMSE_ENTRY always returns. */
28095 gcc_assert (really_return
);
28096 /* Clear all caller-saved regs that are not used to return. */
28097 cmse_nonsecure_entry_clear_before_return ();
28099 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
28101 if (TARGET_HAVE_FPCXT_CMSE
)
28105 insn
= emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx
,
28106 GEN_INT (FPCXTNS_ENUM
)));
28107 rtx dwarf
= gen_rtx_SET (stack_pointer_rtx
,
28108 plus_constant (Pmode
, stack_pointer_rtx
, 4));
28109 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
28110 RTX_FRAME_RELATED_P (insn
) = 1;
28114 if (arm_current_function_pac_enabled_p ())
28115 emit_insn (gen_aut_nop ());
28117 if (!really_return
)
28120 if (crtl
->calls_eh_return
)
28121 emit_insn (gen_addsi3 (stack_pointer_rtx
,
28123 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
28125 if (IS_STACKALIGN (func_type
))
28126 /* Restore the original stack pointer. Before prologue, the stack was
28127 realigned and the original stack pointer saved in r0. For details,
28128 see comment in arm_expand_prologue. */
28129 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
28131 emit_jump_insn (simple_return_rtx
);
28134 /* Implementation of insn prologue_thumb1_interwork. This is the first
28135 "instruction" of a function called in ARM mode. Swap to thumb mode. */
28138 thumb1_output_interwork (void)
28141 FILE *f
= asm_out_file
;
28143 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
28144 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
28146 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
28148 /* Generate code sequence to switch us into Thumb mode. */
28149 /* The .code 32 directive has already been emitted by
28150 ASM_DECLARE_FUNCTION_NAME. */
28151 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
28152 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
28154 /* Generate a label, so that the debugger will notice the
28155 change in instruction sets. This label is also used by
28156 the assembler to bypass the ARM code when this function
28157 is called from a Thumb encoded function elsewhere in the
28158 same file. Hence the definition of STUB_NAME here must
28159 agree with the definition in gas/config/tc-arm.c. */
28161 #define STUB_NAME ".real_start_of"
28163 fprintf (f
, "\t.code\t16\n");
28165 if (arm_dllexport_name_p (name
))
28166 name
= arm_strip_name_encoding (name
);
28168 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
28169 fprintf (f
, "\t.thumb_func\n");
28170 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
28175 /* Handle the case of a double word load into a low register from
28176 a computed memory address. The computed address may involve a
28177 register which is overwritten by the load. */
28179 thumb_load_double_from_address (rtx
*operands
)
28187 gcc_assert (REG_P (operands
[0]));
28188 gcc_assert (MEM_P (operands
[1]));
28190 /* Get the memory address. */
28191 addr
= XEXP (operands
[1], 0);
28193 /* Work out how the memory address is computed. */
28194 switch (GET_CODE (addr
))
28197 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28199 if (REGNO (operands
[0]) == REGNO (addr
))
28201 output_asm_insn ("ldr\t%H0, %2", operands
);
28202 output_asm_insn ("ldr\t%0, %1", operands
);
28206 output_asm_insn ("ldr\t%0, %1", operands
);
28207 output_asm_insn ("ldr\t%H0, %2", operands
);
28212 /* Compute <address> + 4 for the high order load. */
28213 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28215 output_asm_insn ("ldr\t%0, %1", operands
);
28216 output_asm_insn ("ldr\t%H0, %2", operands
);
28220 arg1
= XEXP (addr
, 0);
28221 arg2
= XEXP (addr
, 1);
28223 if (CONSTANT_P (arg1
))
28224 base
= arg2
, offset
= arg1
;
28226 base
= arg1
, offset
= arg2
;
28228 gcc_assert (REG_P (base
));
28230 /* Catch the case of <address> = <reg> + <reg> */
28231 if (REG_P (offset
))
28233 int reg_offset
= REGNO (offset
);
28234 int reg_base
= REGNO (base
);
28235 int reg_dest
= REGNO (operands
[0]);
28237 /* Add the base and offset registers together into the
28238 higher destination register. */
28239 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
28240 reg_dest
+ 1, reg_base
, reg_offset
);
28242 /* Load the lower destination register from the address in
28243 the higher destination register. */
28244 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
28245 reg_dest
, reg_dest
+ 1);
28247 /* Load the higher destination register from its own address
28249 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
28250 reg_dest
+ 1, reg_dest
+ 1);
28254 /* Compute <address> + 4 for the high order load. */
28255 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28257 /* If the computed address is held in the low order register
28258 then load the high order register first, otherwise always
28259 load the low order register first. */
28260 if (REGNO (operands
[0]) == REGNO (base
))
28262 output_asm_insn ("ldr\t%H0, %2", operands
);
28263 output_asm_insn ("ldr\t%0, %1", operands
);
28267 output_asm_insn ("ldr\t%0, %1", operands
);
28268 output_asm_insn ("ldr\t%H0, %2", operands
);
28274 /* With no registers to worry about we can just load the value
28276 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28278 output_asm_insn ("ldr\t%H0, %2", operands
);
28279 output_asm_insn ("ldr\t%0, %1", operands
);
28283 gcc_unreachable ();
28290 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
28295 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28296 std::swap (operands
[4], operands
[5]);
28298 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
28299 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
28303 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28304 std::swap (operands
[4], operands
[5]);
28305 if (REGNO (operands
[5]) > REGNO (operands
[6]))
28306 std::swap (operands
[5], operands
[6]);
28307 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28308 std::swap (operands
[4], operands
[5]);
28310 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
28311 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
28315 gcc_unreachable ();
28321 /* Output a call-via instruction for thumb state. */
28323 thumb_call_via_reg (rtx reg
)
28325 int regno
= REGNO (reg
);
28328 gcc_assert (regno
< LR_REGNUM
);
28330 /* If we are in the normal text section we can use a single instance
28331 per compilation unit. If we are doing function sections, then we need
28332 an entry per section, since we can't rely on reachability. */
28333 if (in_section
== text_section
)
28335 thumb_call_reg_needed
= 1;
28337 if (thumb_call_via_label
[regno
] == NULL
)
28338 thumb_call_via_label
[regno
] = gen_label_rtx ();
28339 labelp
= thumb_call_via_label
+ regno
;
28343 if (cfun
->machine
->call_via
[regno
] == NULL
)
28344 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
28345 labelp
= cfun
->machine
->call_via
+ regno
;
28348 output_asm_insn ("bl\t%a0", labelp
);
28352 /* Routines for generating rtl. */
28354 thumb_expand_cpymemqi (rtx
*operands
)
28356 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
28357 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
28358 HOST_WIDE_INT len
= INTVAL (operands
[2]);
28359 HOST_WIDE_INT offset
= 0;
28363 emit_insn (gen_cpymem12b (out
, in
, out
, in
));
28369 emit_insn (gen_cpymem8b (out
, in
, out
, in
));
28375 rtx reg
= gen_reg_rtx (SImode
);
28376 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
28377 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
28384 rtx reg
= gen_reg_rtx (HImode
);
28385 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
28386 plus_constant (Pmode
, in
,
28388 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
28397 rtx reg
= gen_reg_rtx (QImode
);
28398 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
28399 plus_constant (Pmode
, in
,
28401 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
28408 thumb_reload_out_hi (rtx
*operands
)
28410 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
28413 /* Return the length of a function name prefix
28414 that starts with the character 'c'. */
28416 arm_get_strip_length (int c
)
28420 ARM_NAME_ENCODING_LENGTHS
28425 /* Return a pointer to a function's name with any
28426 and all prefix encodings stripped from it. */
28428 arm_strip_name_encoding (const char *name
)
28432 while ((skip
= arm_get_strip_length (* name
)))
28438 /* If there is a '*' anywhere in the name's prefix, then
28439 emit the stripped name verbatim, otherwise prepend an
28440 underscore if leading underscores are being used. */
28442 arm_asm_output_labelref (FILE *stream
, const char *name
)
28447 while ((skip
= arm_get_strip_length (* name
)))
28449 verbatim
|= (*name
== '*');
28454 fputs (name
, stream
);
28456 asm_fprintf (stream
, "%U%s", name
);
28459 /* This function is used to emit an EABI tag and its associated value.
28460 We emit the numerical value of the tag in case the assembler does not
28461 support textual tags. (Eg gas prior to 2.20). If requested we include
28462 the tag name in a comment so that anyone reading the assembler output
28463 will know which tag is being set.
28465 This function is not static because arm-c.cc needs it too. */
28468 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
28470 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
28471 if (flag_verbose_asm
|| flag_debug_asm
)
28472 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
28473 asm_fprintf (asm_out_file
, "\n");
28476 /* This function is used to print CPU tuning information as comment
28477 in assembler file. Pointers are not printed for now. */
28480 arm_print_tune_info (void)
28482 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
28483 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
28484 current_tune
->constant_limit
);
28485 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28486 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
28487 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28488 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
28489 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28490 "prefetch.l1_cache_size:\t%d\n",
28491 current_tune
->prefetch
.l1_cache_size
);
28492 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28493 "prefetch.l1_cache_line_size:\t%d\n",
28494 current_tune
->prefetch
.l1_cache_line_size
);
28495 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28496 "prefer_constant_pool:\t%d\n",
28497 (int) current_tune
->prefer_constant_pool
);
28498 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28499 "branch_cost:\t(s:speed, p:predictable)\n");
28500 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
28501 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
28502 current_tune
->branch_cost (false, false));
28503 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
28504 current_tune
->branch_cost (false, true));
28505 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
28506 current_tune
->branch_cost (true, false));
28507 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
28508 current_tune
->branch_cost (true, true));
28509 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28510 "prefer_ldrd_strd:\t%d\n",
28511 (int) current_tune
->prefer_ldrd_strd
);
28512 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28513 "logical_op_non_short_circuit:\t[%d,%d]\n",
28514 (int) current_tune
->logical_op_non_short_circuit_thumb
,
28515 (int) current_tune
->logical_op_non_short_circuit_arm
);
28516 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28517 "disparage_flag_setting_t16_encodings:\t%d\n",
28518 (int) current_tune
->disparage_flag_setting_t16_encodings
);
28519 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28520 "string_ops_prefer_neon:\t%d\n",
28521 (int) current_tune
->string_ops_prefer_neon
);
28522 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28523 "max_insns_inline_memset:\t%d\n",
28524 current_tune
->max_insns_inline_memset
);
28525 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
28526 current_tune
->fusible_ops
);
28527 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
28528 (int) current_tune
->sched_autopref
);
28531 /* The last set of target options used to emit .arch directives, etc. This
28532 could be a function-local static if it were not required to expose it as a
28533 root to the garbage collector. */
28534 static GTY(()) cl_target_option
*last_asm_targ_options
= NULL
;
28536 /* Print .arch and .arch_extension directives corresponding to the
28537 current architecture configuration. */
28539 arm_print_asm_arch_directives (FILE *stream
, cl_target_option
*targ_options
)
28541 arm_build_target build_target
;
28542 /* If the target options haven't changed since the last time we were called
28543 there is nothing to do. This should be sufficient to suppress the
28544 majority of redundant work. */
28545 if (last_asm_targ_options
== targ_options
)
28548 last_asm_targ_options
= targ_options
;
28550 build_target
.isa
= sbitmap_alloc (isa_num_bits
);
28551 arm_configure_build_target (&build_target
, targ_options
, false);
28553 if (build_target
.core_name
28554 && !bitmap_bit_p (build_target
.isa
, isa_bit_quirk_no_asmcpu
))
28556 const char* truncated_name
28557 = arm_rewrite_selected_cpu (build_target
.core_name
);
28558 asm_fprintf (stream
, "\t.cpu %s\n", truncated_name
);
28561 const arch_option
*arch
28562 = arm_parse_arch_option_name (all_architectures
, "-march",
28563 build_target
.arch_name
);
28564 auto_sbitmap
opt_bits (isa_num_bits
);
28568 if (strcmp (build_target
.arch_name
, "armv7ve") == 0)
28570 /* Keep backward compatability for assemblers which don't support
28571 armv7ve. Fortunately, none of the following extensions are reset
28572 by a .fpu directive. */
28573 asm_fprintf (stream
, "\t.arch armv7-a\n");
28574 asm_fprintf (stream
, "\t.arch_extension virt\n");
28575 asm_fprintf (stream
, "\t.arch_extension idiv\n");
28576 asm_fprintf (stream
, "\t.arch_extension sec\n");
28577 asm_fprintf (stream
, "\t.arch_extension mp\n");
28580 asm_fprintf (stream
, "\t.arch %s\n", build_target
.arch_name
);
28582 /* The .fpu directive will reset any architecture extensions from the
28583 assembler that relate to the fp/vector extensions. So put this out before
28584 any .arch_extension directives. */
28585 const char *fpu_name
= (TARGET_SOFT_FLOAT
28587 : arm_identify_fpu_from_isa (build_target
.isa
));
28588 asm_fprintf (stream
, "\t.fpu %s\n", fpu_name
);
28590 if (!arch
->common
.extensions
)
28593 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
28599 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
28601 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28602 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28603 floating point instructions is disabled. So the following check
28604 restricts the printing of ".arch_extension mve" and
28605 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28606 this special behaviour because the feature bit "mve" and
28607 "mve_float" are not part of "fpu bits", so they are not cleared
28608 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28609 TARGET_HAVE_MVE_FLOAT are disabled. */
28610 if ((bitmap_bit_p (opt_bits
, isa_bit_mve
) && !TARGET_HAVE_MVE
)
28611 || (bitmap_bit_p (opt_bits
, isa_bit_mve_float
)
28612 && !TARGET_HAVE_MVE_FLOAT
))
28615 /* If every feature bit of this option is set in the target ISA
28616 specification, print out the option name. However, don't print
28617 anything if all the bits are part of the FPU specification. */
28618 if (bitmap_subset_p (opt_bits
, build_target
.isa
)
28619 && !bitmap_subset_p (opt_bits
, isa_all_fpubits_internal
))
28620 asm_fprintf (stream
, "\t.arch_extension %s\n", opt
->name
);
28626 arm_file_start (void)
28629 bool pac
= (aarch_ra_sign_scope
!= AARCH_FUNCTION_NONE
);
28630 bool bti
= (aarch_enable_bti
== 1);
28632 arm_print_asm_arch_directives
28633 (asm_out_file
, TREE_TARGET_OPTION (target_option_default_node
));
28637 /* If we have a named cpu, but we the assembler does not support that
28638 name via .cpu, put out a cpu name attribute; but don't do this if the
28639 name starts with the fictitious prefix, 'generic'. */
28640 if (arm_active_target
.core_name
28641 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_asmcpu
)
28642 && !startswith (arm_active_target
.core_name
, "generic"))
28644 const char* truncated_name
28645 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
28646 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_asmcpu
))
28647 asm_fprintf (asm_out_file
, "\t.eabi_attribute 5, \"%s\"\n",
28651 if (print_tune_info
)
28652 arm_print_tune_info ();
28654 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
28655 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28657 if (TARGET_HARD_FLOAT_ABI
)
28658 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28660 /* Some of these attributes only apply when the corresponding features
28661 are used. However we don't have any easy way of figuring this out.
28662 Conservatively record the setting that would have been used. */
28664 if (flag_rounding_math
)
28665 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28667 if (!flag_unsafe_math_optimizations
)
28669 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28670 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28672 if (flag_signaling_nans
)
28673 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28675 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28676 flag_finite_math_only
? 1 : 3);
28678 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28679 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28680 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28681 flag_short_enums
? 1 : 2);
28683 /* Tag_ABI_optimization_goals. */
28686 else if (optimize
>= 2)
28692 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
28694 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28697 if (arm_fp16_format
)
28698 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28699 (int) arm_fp16_format
);
28701 if (TARGET_HAVE_PACBTI
)
28703 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
28704 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
28706 else if (pac
|| bti
)
28708 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
28709 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
28713 arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
28715 arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
28717 if (arm_lang_output_object_attributes_hook
)
28718 arm_lang_output_object_attributes_hook();
28721 default_file_start ();
28725 arm_file_end (void)
28729 /* Just in case the last function output in the assembler had non-default
28730 architecture directives, we force the assembler state back to the default
28731 set, so that any 'calculated' build attributes are based on the default
28732 options rather than the special options for that function. */
28733 arm_print_asm_arch_directives
28734 (asm_out_file
, TREE_TARGET_OPTION (target_option_default_node
));
28736 if (NEED_INDICATE_EXEC_STACK
)
28737 /* Add .note.GNU-stack. */
28738 file_end_indicate_exec_stack ();
28740 if (! thumb_call_reg_needed
)
28743 switch_to_section (text_section
);
28744 asm_fprintf (asm_out_file
, "\t.code 16\n");
28745 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
28747 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
28749 rtx label
= thumb_call_via_label
[regno
];
28753 targetm
.asm_out
.internal_label (asm_out_file
, "L",
28754 CODE_LABEL_NUMBER (label
));
28755 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
28761 /* Symbols in the text segment can be accessed without indirecting via the
28762 constant pool; it may take an extra binary operation, but this is still
28763 faster than indirecting via memory. Don't do this when not optimizing,
28764 since we won't be calculating al of the offsets necessary to do this
28768 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
28770 if (optimize
> 0 && TREE_CONSTANT (decl
))
28771 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
28773 default_encode_section_info (decl
, rtl
, first
);
28775 #endif /* !ARM_PE */
28778 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
28780 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
28781 && !strcmp (prefix
, "L"))
28783 arm_ccfsm_state
= 0;
28784 arm_target_insn
= NULL
;
28786 default_internal_label (stream
, prefix
, labelno
);
28789 /* Define classes to generate code as RTL or output asm to a file.
28790 Using templates then allows to use the same code to output code
28791 sequences in the two formats. */
28792 class thumb1_const_rtl
28795 thumb1_const_rtl (rtx dst
) : dst (dst
) {}
28797 void mov (HOST_WIDE_INT val
)
28799 emit_set_insn (dst
, GEN_INT (val
));
28802 void add (HOST_WIDE_INT val
)
28804 emit_set_insn (dst
, gen_rtx_PLUS (SImode
, dst
, GEN_INT (val
)));
28807 void ashift (HOST_WIDE_INT shift
)
28809 emit_set_insn (dst
, gen_rtx_ASHIFT (SImode
, dst
, GEN_INT (shift
)));
28814 emit_set_insn (dst
, gen_rtx_NEG (SImode
, dst
));
28821 class thumb1_const_print
28824 thumb1_const_print (FILE *f
, int regno
)
28827 dst_regname
= reg_names
[regno
];
28830 void mov (HOST_WIDE_INT val
)
28832 asm_fprintf (t_file
, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
28836 void add (HOST_WIDE_INT val
)
28838 asm_fprintf (t_file
, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
28842 void ashift (HOST_WIDE_INT shift
)
28844 asm_fprintf (t_file
, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
28845 dst_regname
, shift
);
28850 asm_fprintf (t_file
, "\trsbs\t%s, #0\n", dst_regname
);
28855 const char *dst_regname
;
28858 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28859 Avoid generating useless code when one of the bytes is zero. */
28862 thumb1_gen_const_int_1 (T dst
, HOST_WIDE_INT op1
)
28864 bool mov_done_p
= false;
28865 unsigned HOST_WIDE_INT val
= op1
;
28869 gcc_assert (op1
== trunc_int_for_mode (op1
, SImode
));
28877 /* For negative numbers with the first nine bits set, build the
28878 opposite of OP1, then negate it, it's generally shorter and not
28880 if ((val
& 0xFF800000) == 0xFF800000)
28882 thumb1_gen_const_int_1 (dst
, -op1
);
28887 /* In the general case, we need 7 instructions to build
28888 a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28889 do better if VAL is small enough, or
28890 right-shiftable by a suitable amount. If the
28891 right-shift enables to encode at least one less byte,
28892 it's worth it: we save a adds and a lsls at the
28893 expense of a final lsls. */
28894 int final_shift
= number_of_first_bit_set (val
);
28896 int leading_zeroes
= clz_hwi (val
);
28897 int number_of_bytes_needed
28898 = ((HOST_BITS_PER_WIDE_INT
- 1 - leading_zeroes
)
28899 / BITS_PER_UNIT
) + 1;
28900 int number_of_bytes_needed2
28901 = ((HOST_BITS_PER_WIDE_INT
- 1 - leading_zeroes
- final_shift
)
28902 / BITS_PER_UNIT
) + 1;
28904 if (number_of_bytes_needed2
< number_of_bytes_needed
)
28905 val
>>= final_shift
;
28909 /* If we are in a very small range, we can use either a single movs
28915 unsigned HOST_WIDE_INT high
= val
- 255;
28923 if (final_shift
> 0)
28924 dst
.ashift (final_shift
);
28928 /* General case, emit upper 3 bytes as needed. */
28929 for (i
= 0; i
< 3; i
++)
28931 unsigned HOST_WIDE_INT byte
= (val
>> (8 * (3 - i
))) & 0xff;
28935 /* We are about to emit new bits, stop accumulating a
28936 shift amount, and left-shift only if we have already
28937 emitted some upper bits. */
28940 dst
.ashift (shift
);
28946 /* Stop accumulating shift amount since we've just
28947 emitted some bits. */
28957 /* Emit lower byte. */
28959 dst
.mov (val
& 0xff);
28962 dst
.ashift (shift
);
28964 dst
.add (val
& 0xff);
28967 if (final_shift
> 0)
28968 dst
.ashift (final_shift
);
28972 /* Proxies for thumb1.md, since the thumb1_const_print and
28973 thumb1_const_rtl classes are not exported. */
28975 thumb1_gen_const_int_rtl (rtx dst
, HOST_WIDE_INT op1
)
28977 thumb1_const_rtl
t (dst
);
28978 thumb1_gen_const_int_1 (t
, op1
);
28982 thumb1_gen_const_int_print (rtx dst
, HOST_WIDE_INT op1
)
28984 thumb1_const_print
t (asm_out_file
, REGNO (dst
));
28985 thumb1_gen_const_int_1 (t
, op1
);
28988 /* Output code to add DELTA to the first argument, and then jump
28989 to FUNCTION. Used for C++ multiple inheritance. */
28992 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
28993 HOST_WIDE_INT
, tree function
)
28995 static int thunk_label
= 0;
28998 int mi_delta
= delta
;
28999 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
29001 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
29004 mi_delta
= - mi_delta
;
29006 final_start_function (emit_barrier (), file
, 1);
29010 int labelno
= thunk_label
++;
29011 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
29012 /* Thunks are entered in arm mode when available. */
29013 if (TARGET_THUMB1_ONLY
)
29015 /* push r3 so we can use it as a temporary. */
29016 /* TODO: Omit this save if r3 is not used. */
29017 fputs ("\tpush {r3}\n", file
);
29019 /* With -mpure-code, we cannot load the address from the
29020 constant pool: we build it explicitly. */
29021 if (target_pure_code
)
29023 fputs ("\tmovs\tr3, #:upper8_15:#", file
);
29024 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29025 fputc ('\n', file
);
29026 fputs ("\tlsls r3, #8\n", file
);
29027 fputs ("\tadds\tr3, #:upper0_7:#", file
);
29028 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29029 fputc ('\n', file
);
29030 fputs ("\tlsls r3, #8\n", file
);
29031 fputs ("\tadds\tr3, #:lower8_15:#", file
);
29032 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29033 fputc ('\n', file
);
29034 fputs ("\tlsls r3, #8\n", file
);
29035 fputs ("\tadds\tr3, #:lower0_7:#", file
);
29036 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29037 fputc ('\n', file
);
29040 fputs ("\tldr\tr3, ", file
);
29044 fputs ("\tldr\tr12, ", file
);
29047 if (!target_pure_code
)
29049 assemble_name (file
, label
);
29050 fputc ('\n', file
);
29055 /* If we are generating PIC, the ldr instruction below loads
29056 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
29057 the address of the add + 8, so we have:
29059 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
29062 Note that we have "+ 1" because some versions of GNU ld
29063 don't set the low bit of the result for R_ARM_REL32
29064 relocations against thumb function symbols.
29065 On ARMv6M this is +4, not +8. */
29066 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
29067 assemble_name (file
, labelpc
);
29068 fputs (":\n", file
);
29069 if (TARGET_THUMB1_ONLY
)
29071 /* This is 2 insns after the start of the thunk, so we know it
29072 is 4-byte aligned. */
29073 fputs ("\tadd\tr3, pc, r3\n", file
);
29074 fputs ("\tmov r12, r3\n", file
);
29077 fputs ("\tadd\tr12, pc, r12\n", file
);
29079 else if (TARGET_THUMB1_ONLY
)
29080 fputs ("\tmov r12, r3\n", file
);
29082 if (TARGET_THUMB1_ONLY
)
29084 if (mi_delta
> 255)
29086 /* With -mpure-code, we cannot load MI_DELTA from the
29087 constant pool: we build it explicitly. */
29088 if (target_pure_code
)
29090 thumb1_const_print
r3 (file
, 3);
29091 thumb1_gen_const_int_1 (r3
, mi_delta
);
29095 fputs ("\tldr\tr3, ", file
);
29096 assemble_name (file
, label
);
29097 fputs ("+4\n", file
);
29099 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
29100 mi_op
, this_regno
, this_regno
);
29102 else if (mi_delta
!= 0)
29104 /* Thumb1 unified syntax requires s suffix in instruction name when
29105 one of the operands is immediate. */
29106 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
29107 mi_op
, this_regno
, this_regno
,
29113 /* TODO: Use movw/movt for large constants when available. */
29114 while (mi_delta
!= 0)
29116 if ((mi_delta
& (3 << shift
)) == 0)
29120 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
29121 mi_op
, this_regno
, this_regno
,
29122 mi_delta
& (0xff << shift
));
29123 mi_delta
&= ~(0xff << shift
);
29130 if (TARGET_THUMB1_ONLY
)
29131 fputs ("\tpop\t{r3}\n", file
);
29133 fprintf (file
, "\tbx\tr12\n");
29135 /* With -mpure-code, we don't need to emit literals for the
29136 function address and delta since we emitted code to build
29138 if (!target_pure_code
)
29140 ASM_OUTPUT_ALIGN (file
, 2);
29141 assemble_name (file
, label
);
29142 fputs (":\n", file
);
29145 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
29146 rtx tem
= XEXP (DECL_RTL (function
), 0);
29147 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
29148 pipeline offset is four rather than eight. Adjust the offset
29150 tem
= plus_constant (GET_MODE (tem
), tem
,
29151 TARGET_THUMB1_ONLY
? -3 : -7);
29152 tem
= gen_rtx_MINUS (GET_MODE (tem
),
29154 gen_rtx_SYMBOL_REF (Pmode
,
29155 ggc_strdup (labelpc
)));
29156 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
29159 /* Output ".word .LTHUNKn". */
29160 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
29162 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
29163 assemble_integer (GEN_INT (mi_delta
), 4, BITS_PER_WORD
, 1);
29168 fputs ("\tb\t", file
);
29169 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29170 if (NEED_PLT_RELOC
)
29171 fputs ("(PLT)", file
);
29172 fputc ('\n', file
);
29175 final_end_function ();
29178 /* MI thunk handling for TARGET_32BIT. */
29181 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
29182 HOST_WIDE_INT vcall_offset
, tree function
)
29184 const bool long_call_p
= arm_is_long_call_p (function
);
29186 /* On ARM, this_regno is R0 or R1 depending on
29187 whether the function returns an aggregate or not.
29189 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
29191 ? R1_REGNUM
: R0_REGNUM
);
29193 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
29194 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
29195 reload_completed
= 1;
29196 emit_note (NOTE_INSN_PROLOGUE_END
);
29198 /* Add DELTA to THIS_RTX. */
29200 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
29201 delta
, this_rtx
, this_rtx
, false);
29203 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
29204 if (vcall_offset
!= 0)
29206 /* Load *THIS_RTX. */
29207 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
29208 /* Compute *THIS_RTX + VCALL_OFFSET. */
29209 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
29211 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
29212 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
29213 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
29216 /* Generate a tail call to the target function. */
29217 if (!TREE_USED (function
))
29219 assemble_external (function
);
29220 TREE_USED (function
) = 1;
29222 rtx funexp
= XEXP (DECL_RTL (function
), 0);
29225 emit_move_insn (temp
, funexp
);
29228 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
29229 rtx_insn
*insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
29230 SIBLING_CALL_P (insn
) = 1;
29233 /* Indirect calls require a bit of fixup in PIC mode. */
29236 split_all_insns_noflow ();
29240 insn
= get_insns ();
29241 shorten_branches (insn
);
29242 final_start_function (insn
, file
, 1);
29243 final (insn
, file
, 1);
29244 final_end_function ();
29246 /* Stop pretending this is a post-reload pass. */
29247 reload_completed
= 0;
29250 /* Output code to add DELTA to the first argument, and then jump
29251 to FUNCTION. Used for C++ multiple inheritance. */
29254 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
29255 HOST_WIDE_INT vcall_offset
, tree function
)
29257 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk
));
29259 assemble_start_function (thunk
, fnname
);
29261 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
29263 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
29264 assemble_end_function (thunk
, fnname
);
29268 arm_emit_vector_const (FILE *file
, rtx x
)
29271 const char * pattern
;
29273 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
29275 switch (GET_MODE (x
))
29277 case E_V2SImode
: pattern
= "%08x"; break;
29278 case E_V4HImode
: pattern
= "%04x"; break;
29279 case E_V8QImode
: pattern
= "%02x"; break;
29280 default: gcc_unreachable ();
29283 fprintf (file
, "0x");
29284 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
29288 element
= CONST_VECTOR_ELT (x
, i
);
29289 fprintf (file
, pattern
, INTVAL (element
));
29295 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29296 HFmode constant pool entries are actually loaded with ldr. */
29298 arm_emit_fp16_const (rtx c
)
29302 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
29303 if (WORDS_BIG_ENDIAN
)
29304 assemble_zeros (2);
29305 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
29306 if (!WORDS_BIG_ENDIAN
)
29307 assemble_zeros (2);
29311 arm_output_load_gr (rtx
*operands
)
29318 if (!MEM_P (operands
[1])
29319 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
29320 || !REG_P (reg
= XEXP (sum
, 0))
29321 || !CONST_INT_P (offset
= XEXP (sum
, 1))
29322 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
29323 return "wldrw%?\t%0, %1";
29325 /* Fix up an out-of-range load of a GR register. */
29326 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
29327 wcgr
= operands
[0];
29329 output_asm_insn ("ldr%?\t%0, %1", operands
);
29331 operands
[0] = wcgr
;
29333 output_asm_insn ("tmcr%?\t%0, %1", operands
);
29334 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
29339 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29341 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29342 named arg and all anonymous args onto the stack.
29343 XXX I know the prologue shouldn't be pushing registers, but it is faster
29347 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
29348 const function_arg_info
&arg
,
29350 int second_time ATTRIBUTE_UNUSED
)
29352 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
29355 cfun
->machine
->uses_anonymous_args
= 1;
29356 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
29358 nregs
= pcum
->aapcs_ncrn
;
29359 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl
))
29362 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
29363 if (res
< 0 && warn_psabi
)
29364 inform (input_location
, "parameter passing for argument of "
29365 "type %qT changed in GCC 7.1", arg
.type
);
29369 if (res
> 1 && warn_psabi
)
29370 inform (input_location
,
29371 "parameter passing for argument of type "
29372 "%qT changed in GCC 9.1", arg
.type
);
29377 nregs
= pcum
->nregs
;
29379 if (nregs
< NUM_ARG_REGS
)
29380 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
29383 /* We can't rely on the caller doing the proper promotion when
29384 using APCS or ATPCS. */
29387 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
29389 return !TARGET_AAPCS_BASED
;
29392 static machine_mode
29393 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
29395 int *punsignedp ATTRIBUTE_UNUSED
,
29396 const_tree fntype ATTRIBUTE_UNUSED
,
29397 int for_return ATTRIBUTE_UNUSED
)
29399 if (GET_MODE_CLASS (mode
) == MODE_INT
29400 && GET_MODE_SIZE (mode
) < 4)
29408 arm_default_short_enums (void)
29410 return ARM_DEFAULT_SHORT_ENUMS
;
29414 /* AAPCS requires that anonymous bitfields affect structure alignment. */
29417 arm_align_anon_bitfield (void)
29419 return TARGET_AAPCS_BASED
;
29423 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
29426 arm_cxx_guard_type (void)
29428 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
29432 /* The EABI says test the least significant bit of a guard variable. */
29435 arm_cxx_guard_mask_bit (void)
29437 return TARGET_AAPCS_BASED
;
29441 /* The EABI specifies that all array cookies are 8 bytes long. */
29444 arm_get_cookie_size (tree type
)
29448 if (!TARGET_AAPCS_BASED
)
29449 return default_cxx_get_cookie_size (type
);
29451 size
= build_int_cst (sizetype
, 8);
29456 /* The EABI says that array cookies should also contain the element size. */
29459 arm_cookie_has_size (void)
29461 return TARGET_AAPCS_BASED
;
29465 /* The EABI says constructors and destructors should return a pointer to
29466 the object constructed/destroyed. */
29469 arm_cxx_cdtor_returns_this (void)
29471 return TARGET_AAPCS_BASED
;
29474 /* The EABI says that an inline function may never be the key
29478 arm_cxx_key_method_may_be_inline (void)
29480 return !TARGET_AAPCS_BASED
;
29484 arm_cxx_determine_class_data_visibility (tree decl
)
29486 if (!TARGET_AAPCS_BASED
29487 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
29490 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29491 is exported. However, on systems without dynamic vague linkage,
29492 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
29493 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
29494 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
29496 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
29497 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
29501 arm_cxx_class_data_always_comdat (void)
29503 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29504 vague linkage if the class has no key function. */
29505 return !TARGET_AAPCS_BASED
;
29509 /* The EABI says __aeabi_atexit should be used to register static
29513 arm_cxx_use_aeabi_atexit (void)
29515 return TARGET_AAPCS_BASED
;
29520 arm_set_return_address (rtx source
, rtx scratch
)
29522 arm_stack_offsets
*offsets
;
29523 HOST_WIDE_INT delta
;
29525 unsigned long saved_regs
;
29527 offsets
= arm_get_frame_offsets ();
29528 saved_regs
= offsets
->saved_regs_mask
;
29530 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
29531 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
29534 if (frame_pointer_needed
)
29535 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
29538 /* LR will be the first saved register. */
29539 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
29544 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
29545 GEN_INT (delta
& ~4095)));
29550 addr
= stack_pointer_rtx
;
29552 addr
= plus_constant (Pmode
, addr
, delta
);
29555 /* The store needs to be marked to prevent DSE from deleting
29556 it as dead if it is based on fp. */
29557 mem
= gen_frame_mem (Pmode
, addr
);
29558 MEM_VOLATILE_P (mem
) = true;
29559 emit_move_insn (mem
, source
);
29565 thumb_set_return_address (rtx source
, rtx scratch
)
29567 arm_stack_offsets
*offsets
;
29568 HOST_WIDE_INT delta
;
29569 HOST_WIDE_INT limit
;
29572 unsigned long mask
;
29576 offsets
= arm_get_frame_offsets ();
29577 mask
= offsets
->saved_regs_mask
;
29578 if (mask
& (1 << LR_REGNUM
))
29581 /* Find the saved regs. */
29582 if (frame_pointer_needed
)
29584 delta
= offsets
->soft_frame
- offsets
->saved_args
;
29585 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
29591 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
29594 /* Allow for the stack frame. */
29595 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
29597 /* The link register is always the first saved register. */
29600 /* Construct the address. */
29601 addr
= gen_rtx_REG (SImode
, reg
);
29604 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
29605 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
29609 addr
= plus_constant (Pmode
, addr
, delta
);
29611 /* The store needs to be marked to prevent DSE from deleting
29612 it as dead if it is based on fp. */
29613 mem
= gen_frame_mem (Pmode
, addr
);
29614 MEM_VOLATILE_P (mem
) = true;
29615 emit_move_insn (mem
, source
);
29618 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
29621 /* Implements target hook vector_mode_supported_p. */
29623 arm_vector_mode_supported_p (machine_mode mode
)
29625 /* Neon also supports V2SImode, etc. listed in the clause below. */
29626 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
29627 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
29628 || mode
== V2DImode
|| mode
== V8HFmode
|| mode
== V4BFmode
29629 || mode
== V8BFmode
))
29632 if ((TARGET_NEON
|| TARGET_IWMMXT
)
29633 && ((mode
== V2SImode
)
29634 || (mode
== V4HImode
)
29635 || (mode
== V8QImode
)))
29638 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
29639 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
29640 || mode
== V2HAmode
))
29643 if (TARGET_HAVE_MVE
29644 && (VALID_MVE_SI_MODE (mode
) || VALID_MVE_PRED_MODE (mode
)))
29647 if (TARGET_HAVE_MVE_FLOAT
29648 && (mode
== V2DFmode
|| mode
== V4SFmode
|| mode
== V8HFmode
))
29654 /* Implements target hook array_mode_supported_p. */
29657 arm_array_mode_supported_p (machine_mode mode
,
29658 unsigned HOST_WIDE_INT nelems
)
29660 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29661 for now, as the lane-swapping logic needs to be extended in the expanders.
29662 See PR target/82518. */
29663 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
29664 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
29665 && (nelems
>= 2 && nelems
<= 4))
29668 if (TARGET_HAVE_MVE
&& !BYTES_BIG_ENDIAN
29669 && VALID_MVE_MODE (mode
) && (nelems
== 2 || nelems
== 4))
29675 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29676 registers when autovectorizing for Neon, at least until multiple vector
29677 widths are supported properly by the middle-end. */
29679 static machine_mode
29680 arm_preferred_simd_mode (scalar_mode mode
)
29686 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HFmode
: V8HFmode
;
29688 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
29690 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
29692 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
29694 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
29696 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
29703 if (TARGET_REALLY_IWMMXT
)
29716 if (TARGET_HAVE_MVE
)
29729 if (TARGET_HAVE_MVE_FLOAT
)
29743 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29745 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29746 using r0-r4 for function arguments, r7 for the stack frame and don't have
29747 enough left over to do doubleword arithmetic. For Thumb-2 all the
29748 potentially problematic instructions accept high registers so this is not
29749 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29750 that require many low registers. */
29752 arm_class_likely_spilled_p (reg_class_t rclass
)
29754 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
29755 || rclass
== CC_REG
)
29758 return default_class_likely_spilled_p (rclass
);
29761 /* Implements target hook small_register_classes_for_mode_p. */
29763 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
29765 return TARGET_THUMB1
;
29768 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29769 ARM insns and therefore guarantee that the shift count is modulo 256.
29770 DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29771 guarantee no particular behavior for out-of-range counts. */
29773 static unsigned HOST_WIDE_INT
29774 arm_shift_truncation_mask (machine_mode mode
)
29776 return mode
== SImode
? 255 : 0;
29780 /* Map internal gcc register numbers to DWARF2 register numbers. */
29783 arm_debugger_regno (unsigned int regno
)
29788 if (IS_VFP_REGNUM (regno
))
29790 /* See comment in arm_dwarf_register_span. */
29791 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29792 return 64 + regno
- FIRST_VFP_REGNUM
;
29794 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
29797 if (IS_IWMMXT_GR_REGNUM (regno
))
29798 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
29800 if (IS_IWMMXT_REGNUM (regno
))
29801 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
29803 if (IS_PAC_REGNUM (regno
))
29804 return DWARF_PAC_REGNUM
;
29806 return DWARF_FRAME_REGISTERS
;
29809 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29810 GCC models tham as 64 32-bit registers, so we need to describe this to
29811 the DWARF generation code. Other registers can use the default. */
29813 arm_dwarf_register_span (rtx rtl
)
29821 regno
= REGNO (rtl
);
29822 if (!IS_VFP_REGNUM (regno
))
29825 /* XXX FIXME: The EABI defines two VFP register ranges:
29826 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29828 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29829 corresponding D register. Until GDB supports this, we shall use the
29830 legacy encodings. We also use these encodings for D0-D15 for
29831 compatibility with older debuggers. */
29832 mode
= GET_MODE (rtl
);
29833 if (GET_MODE_SIZE (mode
) < 8)
29836 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29838 nregs
= GET_MODE_SIZE (mode
) / 4;
29839 for (i
= 0; i
< nregs
; i
+= 2)
29840 if (TARGET_BIG_END
)
29842 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29843 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
29847 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
29848 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29853 nregs
= GET_MODE_SIZE (mode
) / 8;
29854 for (i
= 0; i
< nregs
; i
++)
29855 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
29858 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
29861 #if ARM_UNWIND_INFO
29862 /* Emit unwind directives for a store-multiple instruction or stack pointer
29863 push during alignment.
29864 These should only ever be generated by the function prologue code, so
29865 expect them to have a particular form.
29866 The store-multiple instruction sometimes pushes pc as the last register,
29867 although it should not be tracked into unwind information, or for -Os
29868 sometimes pushes some dummy registers before first register that needs
29869 to be tracked in unwind information; such dummy registers are there just
29870 to avoid separate stack adjustment, and will not be restored in the
29874 arm_unwind_emit_sequence (FILE * out_file
, rtx p
)
29877 HOST_WIDE_INT offset
;
29878 HOST_WIDE_INT nregs
;
29882 unsigned padfirst
= 0, padlast
= 0;
29885 e
= XVECEXP (p
, 0, 0);
29886 gcc_assert (GET_CODE (e
) == SET
);
29888 /* First insn will adjust the stack pointer. */
29889 gcc_assert (GET_CODE (e
) == SET
29890 && REG_P (SET_DEST (e
))
29891 && REGNO (SET_DEST (e
)) == SP_REGNUM
29892 && GET_CODE (SET_SRC (e
)) == PLUS
);
29894 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
29895 nregs
= XVECLEN (p
, 0) - 1;
29896 gcc_assert (nregs
);
29898 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
29899 if (reg
< 16 || IS_PAC_REGNUM (reg
))
29901 /* For -Os dummy registers can be pushed at the beginning to
29902 avoid separate stack pointer adjustment. */
29903 e
= XVECEXP (p
, 0, 1);
29904 e
= XEXP (SET_DEST (e
), 0);
29905 if (GET_CODE (e
) == PLUS
)
29906 padfirst
= INTVAL (XEXP (e
, 1));
29907 gcc_assert (padfirst
== 0 || optimize_size
);
29908 /* The function prologue may also push pc, but not annotate it as it is
29909 never restored. We turn this into a stack pointer adjustment. */
29910 e
= XVECEXP (p
, 0, nregs
);
29911 e
= XEXP (SET_DEST (e
), 0);
29912 if (GET_CODE (e
) == PLUS
)
29913 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
29915 padlast
= offset
- 4;
29916 gcc_assert (padlast
== 0 || padlast
== 4);
29918 fprintf (out_file
, "\t.pad #4\n");
29920 fprintf (out_file
, "\t.save {");
29922 else if (IS_VFP_REGNUM (reg
))
29925 fprintf (out_file
, "\t.vsave {");
29928 /* Unknown register type. */
29929 gcc_unreachable ();
29931 /* If the stack increment doesn't match the size of the saved registers,
29932 something has gone horribly wrong. */
29933 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
29937 /* The remaining insns will describe the stores. */
29938 for (i
= 1; i
<= nregs
; i
++)
29940 /* Expect (set (mem <addr>) (reg)).
29941 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29942 e
= XVECEXP (p
, 0, i
);
29943 gcc_assert (GET_CODE (e
) == SET
29944 && MEM_P (SET_DEST (e
))
29945 && REG_P (SET_SRC (e
)));
29947 reg
= REGNO (SET_SRC (e
));
29948 gcc_assert (reg
>= lastreg
);
29951 fprintf (out_file
, ", ");
29952 /* We can't use %r for vfp because we need to use the
29953 double precision register names. */
29954 if (IS_VFP_REGNUM (reg
))
29955 asm_fprintf (out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
29956 else if (IS_PAC_REGNUM (reg
))
29957 asm_fprintf (asm_out_file
, "ra_auth_code");
29959 asm_fprintf (out_file
, "%r", reg
);
29963 /* Check that the addresses are consecutive. */
29964 e
= XEXP (SET_DEST (e
), 0);
29965 if (GET_CODE (e
) == PLUS
)
29966 gcc_assert (REG_P (XEXP (e
, 0))
29967 && REGNO (XEXP (e
, 0)) == SP_REGNUM
29968 && CONST_INT_P (XEXP (e
, 1))
29969 && offset
== INTVAL (XEXP (e
, 1)));
29973 && REGNO (e
) == SP_REGNUM
);
29974 offset
+= reg_size
;
29977 fprintf (out_file
, "}\n");
29979 fprintf (out_file
, "\t.pad #%d\n", padfirst
);
29982 /* Emit unwind directives for a SET. */
29985 arm_unwind_emit_set (FILE * out_file
, rtx p
)
29993 switch (GET_CODE (e0
))
29996 /* Pushing a single register. */
29997 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
29998 || !REG_P (XEXP (XEXP (e0
, 0), 0))
29999 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
30002 asm_fprintf (out_file
, "\t.save ");
30003 if (IS_VFP_REGNUM (REGNO (e1
)))
30004 asm_fprintf(out_file
, "{d%d}\n",
30005 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
30007 asm_fprintf(out_file
, "{%r}\n", REGNO (e1
));
30011 if (REGNO (e0
) == SP_REGNUM
)
30013 /* A stack increment. */
30014 if (GET_CODE (e1
) != PLUS
30015 || !REG_P (XEXP (e1
, 0))
30016 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
30017 || !CONST_INT_P (XEXP (e1
, 1)))
30020 asm_fprintf (out_file
, "\t.pad #%wd\n",
30021 -INTVAL (XEXP (e1
, 1)));
30023 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
30025 HOST_WIDE_INT offset
;
30027 if (GET_CODE (e1
) == PLUS
)
30029 if (!REG_P (XEXP (e1
, 0))
30030 || !CONST_INT_P (XEXP (e1
, 1)))
30032 reg
= REGNO (XEXP (e1
, 0));
30033 offset
= INTVAL (XEXP (e1
, 1));
30034 asm_fprintf (out_file
, "\t.setfp %r, %r, #%wd\n",
30035 HARD_FRAME_POINTER_REGNUM
, reg
,
30038 else if (REG_P (e1
))
30041 asm_fprintf (out_file
, "\t.setfp %r, %r\n",
30042 HARD_FRAME_POINTER_REGNUM
, reg
);
30047 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
30049 /* Move from sp to reg. */
30050 asm_fprintf (out_file
, "\t.movsp %r\n", REGNO (e0
));
30052 else if (GET_CODE (e1
) == PLUS
30053 && REG_P (XEXP (e1
, 0))
30054 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
30055 && CONST_INT_P (XEXP (e1
, 1)))
30057 /* Set reg to offset from sp. */
30058 asm_fprintf (out_file
, "\t.movsp %r, #%d\n",
30059 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
30061 else if (REGNO (e0
) == IP_REGNUM
&& arm_current_function_pac_enabled_p ())
30063 if (cfun
->machine
->pacspval_needed
)
30064 asm_fprintf (out_file
, "\t.pacspval\n");
30076 /* Emit unwind directives for the given insn. */
30079 arm_unwind_emit (FILE * out_file
, rtx_insn
*insn
)
30082 bool handled_one
= false;
30084 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
30087 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
30088 && (TREE_NOTHROW (current_function_decl
)
30089 || crtl
->all_throwers_are_sibcalls
))
30092 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
30095 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
30097 switch (REG_NOTE_KIND (note
))
30099 case REG_FRAME_RELATED_EXPR
:
30100 pat
= XEXP (note
, 0);
30103 case REG_CFA_REGISTER
:
30104 pat
= XEXP (note
, 0);
30107 pat
= PATTERN (insn
);
30108 if (GET_CODE (pat
) == PARALLEL
)
30109 pat
= XVECEXP (pat
, 0, 0);
30112 /* Only emitted for IS_STACKALIGN re-alignment. */
30117 src
= SET_SRC (pat
);
30118 dest
= SET_DEST (pat
);
30120 gcc_assert (src
== stack_pointer_rtx
30121 || IS_PAC_REGNUM (REGNO (src
)));
30122 reg
= REGNO (dest
);
30124 if (IS_PAC_REGNUM (REGNO (src
)))
30125 arm_unwind_emit_set (out_file
, PATTERN (insn
));
30127 asm_fprintf (out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
30130 handled_one
= true;
30133 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
30134 to get correct dwarf information for shrink-wrap. We should not
30135 emit unwind information for it because these are used either for
30136 pretend arguments or notes to adjust sp and restore registers from
30138 case REG_CFA_DEF_CFA
:
30139 case REG_CFA_ADJUST_CFA
:
30140 case REG_CFA_RESTORE
:
30143 case REG_CFA_EXPRESSION
:
30144 case REG_CFA_OFFSET
:
30145 /* ??? Only handling here what we actually emit. */
30146 gcc_unreachable ();
30154 pat
= PATTERN (insn
);
30157 switch (GET_CODE (pat
))
30160 arm_unwind_emit_set (out_file
, pat
);
30164 /* Store multiple. */
30165 arm_unwind_emit_sequence (out_file
, pat
);
30174 /* Output a reference from a function exception table to the type_info
30175 object X. The EABI specifies that the symbol should be relocated by
30176 an R_ARM_TARGET2 relocation. */
30179 arm_output_ttype (rtx x
)
30181 fputs ("\t.word\t", asm_out_file
);
30182 output_addr_const (asm_out_file
, x
);
30183 /* Use special relocations for symbol references. */
30184 if (!CONST_INT_P (x
))
30185 fputs ("(TARGET2)", asm_out_file
);
30186 fputc ('\n', asm_out_file
);
30191 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
30194 arm_asm_emit_except_personality (rtx personality
)
30196 fputs ("\t.personality\t", asm_out_file
);
30197 output_addr_const (asm_out_file
, personality
);
30198 fputc ('\n', asm_out_file
);
30200 #endif /* ARM_UNWIND_INFO */
30202 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
30205 arm_asm_init_sections (void)
30207 #if ARM_UNWIND_INFO
30208 exception_section
= get_unnamed_section (0, output_section_asm_op
,
30210 #endif /* ARM_UNWIND_INFO */
30212 #ifdef OBJECT_FORMAT_ELF
30213 if (target_pure_code
)
30214 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
30218 /* Output unwind directives for the start/end of a function. */
30221 arm_output_fn_unwind (FILE * f
, bool prologue
)
30223 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
30227 fputs ("\t.fnstart\n", f
);
30230 /* If this function will never be unwound, then mark it as such.
30231 The came condition is used in arm_unwind_emit to suppress
30232 the frame annotations. */
30233 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
30234 && (TREE_NOTHROW (current_function_decl
)
30235 || crtl
->all_throwers_are_sibcalls
))
30236 fputs("\t.cantunwind\n", f
);
30238 fputs ("\t.fnend\n", f
);
30243 arm_emit_tls_decoration (FILE *fp
, rtx x
)
30245 enum tls_reloc reloc
;
30248 val
= XVECEXP (x
, 0, 0);
30249 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
30251 output_addr_const (fp
, val
);
30256 fputs ("(tlsgd)", fp
);
30258 case TLS_GD32_FDPIC
:
30259 fputs ("(tlsgd_fdpic)", fp
);
30262 fputs ("(tlsldm)", fp
);
30264 case TLS_LDM32_FDPIC
:
30265 fputs ("(tlsldm_fdpic)", fp
);
30268 fputs ("(tlsldo)", fp
);
30271 fputs ("(gottpoff)", fp
);
30273 case TLS_IE32_FDPIC
:
30274 fputs ("(gottpoff_fdpic)", fp
);
30277 fputs ("(tpoff)", fp
);
30280 fputs ("(tlsdesc)", fp
);
30283 gcc_unreachable ();
30292 fputs (" + (. - ", fp
);
30293 output_addr_const (fp
, XVECEXP (x
, 0, 2));
30294 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30295 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
30296 output_addr_const (fp
, XVECEXP (x
, 0, 3));
30306 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
30309 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
30311 gcc_assert (size
== 4);
30312 fputs ("\t.word\t", file
);
30313 output_addr_const (file
, x
);
30314 fputs ("(tlsldo)", file
);
30317 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
30320 arm_output_addr_const_extra (FILE *fp
, rtx x
)
30322 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
30323 return arm_emit_tls_decoration (fp
, x
);
30324 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
30327 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
30329 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
30330 assemble_name_raw (fp
, label
);
30334 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
30336 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
30340 output_addr_const (fp
, XVECEXP (x
, 0, 0));
30344 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
30346 output_addr_const (fp
, XVECEXP (x
, 0, 0));
30350 output_addr_const (fp
, XVECEXP (x
, 0, 1));
30354 else if (GET_CODE (x
) == CONST_VECTOR
)
30355 return arm_emit_vector_const (fp
, x
);
30360 /* Output assembly for a shift instruction.
30361 SET_FLAGS determines how the instruction modifies the condition codes.
30362 0 - Do not set condition codes.
30363 1 - Set condition codes.
30364 2 - Use smallest instruction. */
30366 arm_output_shift(rtx
* operands
, int set_flags
)
30369 static const char flag_chars
[3] = {'?', '.', '!'};
30374 c
= flag_chars
[set_flags
];
30375 shift
= shift_op(operands
[3], &val
);
30379 operands
[2] = GEN_INT(val
);
30380 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
30383 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
30385 output_asm_insn (pattern
, operands
);
30389 /* Output assembly for a WMMX immediate shift instruction. */
30391 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
30393 int shift
= INTVAL (operands
[2]);
30395 machine_mode opmode
= GET_MODE (operands
[0]);
30397 gcc_assert (shift
>= 0);
30399 /* If the shift value in the register versions is > 63 (for D qualifier),
30400 31 (for W qualifier) or 15 (for H qualifier). */
30401 if (((opmode
== V4HImode
) && (shift
> 15))
30402 || ((opmode
== V2SImode
) && (shift
> 31))
30403 || ((opmode
== DImode
) && (shift
> 63)))
30407 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
30408 output_asm_insn (templ
, operands
);
30409 if (opmode
== DImode
)
30411 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
30412 output_asm_insn (templ
, operands
);
30417 /* The destination register will contain all zeros. */
30418 sprintf (templ
, "wzero\t%%0");
30419 output_asm_insn (templ
, operands
);
30424 if ((opmode
== DImode
) && (shift
> 32))
30426 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
30427 output_asm_insn (templ
, operands
);
30428 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
30429 output_asm_insn (templ
, operands
);
30433 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
30434 output_asm_insn (templ
, operands
);
30439 /* Output assembly for a WMMX tinsr instruction. */
30441 arm_output_iwmmxt_tinsr (rtx
*operands
)
30443 int mask
= INTVAL (operands
[3]);
30446 int units
= mode_nunits
[GET_MODE (operands
[0])];
30447 gcc_assert ((mask
& (mask
- 1)) == 0);
30448 for (i
= 0; i
< units
; ++i
)
30450 if ((mask
& 0x01) == 1)
30456 gcc_assert (i
< units
);
30458 switch (GET_MODE (operands
[0]))
30461 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
30464 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
30467 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
30470 gcc_unreachable ();
30473 output_asm_insn (templ
, operands
);
30478 /* Output an arm casesi dispatch sequence. Used by arm_casesi_internal insn.
30479 Responsible for the handling of switch statements in arm. */
30481 arm_output_casesi (rtx
*operands
)
30484 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
30485 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
30486 output_asm_insn ("cmp\t%0, %1", operands
);
30487 output_asm_insn ("bhi\t%l3", operands
);
30488 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
30489 switch (GET_MODE (diff_vec
))
30492 if (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
)
30493 output_asm_insn ("ldrb\t%4, [%5, %0]", operands
);
30495 output_asm_insn ("ldrsb\t%4, [%5, %0]", operands
);
30496 output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands
);
30499 if (REGNO (operands
[4]) != REGNO (operands
[5]))
30501 output_asm_insn ("add\t%4, %0, %0", operands
);
30502 if (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
)
30503 output_asm_insn ("ldrh\t%4, [%5, %4]", operands
);
30505 output_asm_insn ("ldrsh\t%4, [%5, %4]", operands
);
30509 output_asm_insn ("add\t%4, %5, %0", operands
);
30510 if (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
)
30511 output_asm_insn ("ldrh\t%4, [%4, %0]", operands
);
30513 output_asm_insn ("ldrsh\t%4, [%4, %0]", operands
);
30515 output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands
);
30520 output_asm_insn ("ldr\t%4, [%5, %0, lsl #2]", operands
);
30521 output_asm_insn ("add\t%|pc, %|pc, %4", operands
);
30524 output_asm_insn ("ldr\t%|pc, [%5, %0, lsl #2]", operands
);
30527 gcc_unreachable ();
30529 assemble_label (asm_out_file
, label
);
30530 output_asm_insn ("nop", operands
);
30534 /* Output a Thumb-1 casesi dispatch sequence. */
30536 thumb1_output_casesi (rtx
*operands
)
30538 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
30540 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
30542 switch (GET_MODE(diff_vec
))
30545 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
30546 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30548 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
30549 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30551 return "bl\t%___gnu_thumb1_case_si";
30553 gcc_unreachable ();
30557 /* Output a Thumb-2 casesi instruction. */
30559 thumb2_output_casesi (rtx
*operands
)
30561 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
30563 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
30565 output_asm_insn ("cmp\t%0, %1", operands
);
30566 output_asm_insn ("bhi\t%l3", operands
);
30567 switch (GET_MODE(diff_vec
))
30570 return "tbb\t[%|pc, %0]";
30572 return "tbh\t[%|pc, %0, lsl #1]";
30576 output_asm_insn ("adr\t%4, %l2", operands
);
30577 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
30578 output_asm_insn ("add\t%4, %4, %5", operands
);
30583 output_asm_insn ("adr\t%4, %l2", operands
);
30584 return "ldr\t%|pc, [%4, %0, lsl #2]";
30587 gcc_unreachable ();
30591 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
30592 per-core tuning structs. */
30594 arm_issue_rate (void)
30596 return current_tune
->issue_rate
;
30599 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
30601 arm_sched_variable_issue (FILE *, int, rtx_insn
*insn
, int more
)
30603 if (DEBUG_INSN_P (insn
))
30606 rtx_code code
= GET_CODE (PATTERN (insn
));
30607 if (code
== USE
|| code
== CLOBBER
)
30610 if (get_attr_type (insn
) == TYPE_NO_INSN
)
30616 /* Return how many instructions should scheduler lookahead to choose the
30619 arm_first_cycle_multipass_dfa_lookahead (void)
30621 int issue_rate
= arm_issue_rate ();
30623 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
30626 /* Enable modeling of L2 auto-prefetcher. */
30628 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
30630 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
30634 arm_mangle_type (const_tree type
)
30636 /* The ARM ABI documents (10th October 2008) say that "__va_list"
30637 has to be managled as if it is in the "std" namespace. */
30638 if (TARGET_AAPCS_BASED
30639 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
30640 return "St9__va_list";
30642 /* Half-precision floating point types. */
30643 if (SCALAR_FLOAT_TYPE_P (type
) && TYPE_PRECISION (type
) == 16)
30645 if (TYPE_MAIN_VARIANT (type
) == float16_type_node
)
30647 if (TYPE_MODE (type
) == BFmode
)
30653 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30655 if (TYPE_NAME (type
) != NULL
)
30656 return arm_mangle_builtin_type (type
);
30658 /* Use the default mangling. */
30662 /* Order of allocation of core registers for Thumb: this allocation is
30663 written over the corresponding initial entries of the array
30664 initialized with REG_ALLOC_ORDER. We allocate all low registers
30665 first. Saving and restoring a low register is usually cheaper than
30666 using a call-clobbered high register. */
30668 static const int thumb_core_reg_alloc_order
[] =
30670 3, 2, 1, 0, 4, 5, 6, 7,
30671 12, 14, 8, 9, 10, 11
30674 /* Adjust register allocation order when compiling for Thumb. */
30677 arm_order_regs_for_local_alloc (void)
30679 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
30680 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
30682 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
30683 sizeof (thumb_core_reg_alloc_order
));
30686 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30689 arm_frame_pointer_required (void)
30691 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
30694 /* If the function receives nonlocal gotos, it needs to save the frame
30695 pointer in the nonlocal_goto_save_area object. */
30696 if (cfun
->has_nonlocal_label
)
30699 /* The frame pointer is required for non-leaf APCS frames. */
30700 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
30703 /* If we are probing the stack in the prologue, we will have a faulting
30704 instruction prior to the stack adjustment and this requires a frame
30705 pointer if we want to catch the exception using the EABI unwinder. */
30706 if (!IS_INTERRUPT (arm_current_func_type ())
30707 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
30708 || flag_stack_clash_protection
)
30709 && arm_except_unwind_info (&global_options
) == UI_TARGET
30710 && cfun
->can_throw_non_call_exceptions
)
30712 HOST_WIDE_INT size
= get_frame_size ();
30714 /* That's irrelevant if there is no stack adjustment. */
30718 /* That's relevant only if there is a stack probe. */
30719 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
30721 /* We don't have the final size of the frame so adjust. */
30722 size
+= 32 * UNITS_PER_WORD
;
30723 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
30733 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30734 All modes except THUMB1 have conditional execution.
30735 If we have conditional arithmetic, return false before reload to
30736 enable some ifcvt transformations. */
30738 arm_have_conditional_execution (void)
30740 bool has_cond_exec
, enable_ifcvt_trans
;
30742 /* Only THUMB1 cannot support conditional execution. */
30743 has_cond_exec
= !TARGET_THUMB1
;
30745 /* Enable ifcvt transformations if we have conditional arithmetic, but only
30747 enable_ifcvt_trans
= TARGET_COND_ARITH
&& !reload_completed
;
30749 return has_cond_exec
&& !enable_ifcvt_trans
;
30752 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30753 static HOST_WIDE_INT
30754 arm_vector_alignment (const_tree type
)
30756 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
30758 if (TARGET_AAPCS_BASED
)
30759 align
= MIN (align
, 64);
30764 static unsigned int
30765 arm_autovectorize_vector_modes (vector_modes
*modes
, bool)
30767 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
30769 modes
->safe_push (V16QImode
);
30770 modes
->safe_push (V8QImode
);
30776 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
30778 /* Vectors which aren't in packed structures will not be less aligned than
30779 the natural alignment of their element type, so this is safe. */
30780 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30783 return default_builtin_vector_alignment_reachable (type
, is_packed
);
30787 arm_builtin_support_vector_misalignment (machine_mode mode
,
30788 const_tree type
, int misalignment
,
30791 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30793 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
30798 /* If the misalignment is unknown, we should be able to handle the access
30799 so long as it is not to a member of a packed data structure. */
30800 if (misalignment
== -1)
30803 /* Return true if the misalignment is a multiple of the natural alignment
30804 of the vector's element type. This is probably always going to be
30805 true in practice, since we've already established that this isn't a
30807 return ((misalignment
% align
) == 0);
30810 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
30815 arm_conditional_register_usage (void)
30819 if (TARGET_THUMB1
&& optimize_size
)
30821 /* When optimizing for size on Thumb-1, it's better not
30822 to use the HI regs, because of the overhead of
30824 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
30825 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
30828 /* The link register can be clobbered by any branch insn,
30829 but we have no way to track that at present, so mark
30830 it as unavailable. */
30832 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
30834 if (TARGET_32BIT
&& TARGET_VFP_BASE
)
30836 /* VFPv3 registers are disabled when earlier VFP
30837 versions are selected due to the definition of
30838 LAST_VFP_REGNUM. */
30839 for (regno
= FIRST_VFP_REGNUM
;
30840 regno
<= LAST_VFP_REGNUM
; ++ regno
)
30842 fixed_regs
[regno
] = 0;
30843 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
30844 || regno
>= FIRST_VFP_REGNUM
+ 32;
30846 if (TARGET_HAVE_MVE
)
30847 fixed_regs
[VPR_REGNUM
] = 0;
30850 if (TARGET_REALLY_IWMMXT
&& !TARGET_GENERAL_REGS_ONLY
)
30852 regno
= FIRST_IWMMXT_GR_REGNUM
;
30853 /* The 2002/10/09 revision of the XScale ABI has wCG0
30854 and wCG1 as call-preserved registers. The 2002/11/21
30855 revision changed this so that all wCG registers are
30856 scratch registers. */
30857 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
30858 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
30859 fixed_regs
[regno
] = 0;
30860 /* The XScale ABI has wR0 - wR9 as scratch registers,
30861 the rest as call-preserved registers. */
30862 for (regno
= FIRST_IWMMXT_REGNUM
;
30863 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
30865 fixed_regs
[regno
] = 0;
30866 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
30870 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
30872 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30873 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30875 else if (TARGET_APCS_STACK
)
30877 fixed_regs
[10] = 1;
30878 call_used_regs
[10] = 1;
30880 /* -mcaller-super-interworking reserves r11 for calls to
30881 _interwork_r11_call_via_rN(). Making the register global
30882 is an easy way of ensuring that it remains valid for all
30884 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
30885 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
30887 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30888 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30889 if (TARGET_CALLER_INTERWORKING
)
30890 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30893 /* The Q and GE bits are only accessed via special ACLE patterns. */
30894 CLEAR_HARD_REG_BIT (operand_reg_set
, APSRQ_REGNUM
);
30895 CLEAR_HARD_REG_BIT (operand_reg_set
, APSRGE_REGNUM
);
30897 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30901 arm_preferred_rename_class (reg_class_t rclass
)
30903 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30904 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30905 and code size can be reduced. */
30906 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
30912 /* Compute the attribute "length" of insn "*push_multi".
30913 So this function MUST be kept in sync with that insn pattern. */
30915 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
30917 int i
, regno
, hi_reg
;
30918 int num_saves
= XVECLEN (parallel_op
, 0);
30928 regno
= REGNO (first_op
);
30929 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30930 list is 8-bit. Normally this means all registers in the list must be
30931 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
30932 encodings. There is one exception for PUSH that LR in HI_REGS can be used
30933 with 16-bit encoding. */
30934 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30935 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
30937 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
30938 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30946 /* Compute the attribute "length" of insn. Currently, this function is used
30947 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30948 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
30949 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
30950 true if OPERANDS contains insn which explicit updates base register. */
30953 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
30962 rtx parallel_op
= operands
[0];
30963 /* Initialize to elements number of PARALLEL. */
30964 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
30965 /* Initialize the value to base register. */
30966 unsigned regno
= REGNO (operands
[1]);
30967 /* Skip return and write back pattern.
30968 We only need register pop pattern for later analysis. */
30969 unsigned first_indx
= 0;
30970 first_indx
+= return_pc
? 1 : 0;
30971 first_indx
+= write_back_p
? 1 : 0;
30973 /* A pop operation can be done through LDM or POP. If the base register is SP
30974 and if it's with write back, then a LDM will be alias of POP. */
30975 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
30976 bool ldm_p
= !pop_p
;
30978 /* Check base register for LDM. */
30979 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
30982 /* Check each register in the list. */
30983 for (; indx
>= first_indx
; indx
--)
30985 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
30986 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
30987 comment in arm_attr_length_push_multi. */
30988 if (REGNO_REG_CLASS (regno
) == HI_REGS
30989 && (regno
!= PC_REGNUM
|| ldm_p
))
30996 /* Compute the number of instructions emitted by output_move_double. */
30998 arm_count_output_move_double_insns (rtx
*operands
)
31002 /* output_move_double may modify the operands array, so call it
31003 here on a copy of the array. */
31004 ops
[0] = operands
[0];
31005 ops
[1] = operands
[1];
31006 output_move_double (ops
, false, &count
);
31010 /* Same as above, but operands are a register/memory pair in SImode.
31011 Assumes operands has the base register in position 0 and memory in position
31012 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
31014 arm_count_ldrdstrd_insns (rtx
*operands
, bool load
)
31018 int regnum
, memnum
;
31020 regnum
= 0, memnum
= 1;
31022 regnum
= 1, memnum
= 0;
31023 ops
[regnum
] = gen_rtx_REG (DImode
, REGNO (operands
[0]));
31024 ops
[memnum
] = adjust_address (operands
[2], DImode
, 0);
31025 output_move_double (ops
, false, &count
);
31031 vfp3_const_double_for_fract_bits (rtx operand
)
31033 REAL_VALUE_TYPE r0
;
31035 if (!CONST_DOUBLE_P (operand
))
31038 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
31039 if (exact_real_inverse (DFmode
, &r0
)
31040 && !REAL_VALUE_NEGATIVE (r0
))
31042 if (exact_real_truncate (DFmode
, &r0
))
31044 HOST_WIDE_INT value
= real_to_integer (&r0
);
31045 value
= value
& 0xffffffff;
31046 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
31048 int ret
= exact_log2 (value
);
31049 gcc_assert (IN_RANGE (ret
, 0, 31));
31057 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
31058 log2 is in [1, 32], return that log2. Otherwise return -1.
31059 This is used in the patterns for vcvt.s32.f32 floating-point to
31060 fixed-point conversions. */
31063 vfp3_const_double_for_bits (rtx x
)
31065 const REAL_VALUE_TYPE
*r
;
31067 if (!CONST_DOUBLE_P (x
))
31070 r
= CONST_DOUBLE_REAL_VALUE (x
);
31072 if (REAL_VALUE_NEGATIVE (*r
)
31073 || REAL_VALUE_ISNAN (*r
)
31074 || REAL_VALUE_ISINF (*r
)
31075 || !real_isinteger (r
, SFmode
))
31078 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
31080 /* The exact_log2 above will have returned -1 if this is
31081 not an exact log2. */
31082 if (!IN_RANGE (hwint
, 1, 32))
31089 /* Emit a memory barrier around an atomic sequence according to MODEL. */
31092 arm_pre_atomic_barrier (enum memmodel model
)
31094 if (need_atomic_barrier_p (model
, true))
31095 emit_insn (gen_memory_barrier ());
31099 arm_post_atomic_barrier (enum memmodel model
)
31101 if (need_atomic_barrier_p (model
, false))
31102 emit_insn (gen_memory_barrier ());
31105 /* Emit the load-exclusive and store-exclusive instructions.
31106 Use acquire and release versions if necessary. */
31109 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
31111 rtx (*gen
) (rtx
, rtx
);
31117 case E_QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
31118 case E_HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
31119 case E_SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
31120 case E_DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
31122 gcc_unreachable ();
31129 case E_QImode
: gen
= gen_arm_load_exclusiveqi
; break;
31130 case E_HImode
: gen
= gen_arm_load_exclusivehi
; break;
31131 case E_SImode
: gen
= gen_arm_load_exclusivesi
; break;
31132 case E_DImode
: gen
= gen_arm_load_exclusivedi
; break;
31134 gcc_unreachable ();
31138 emit_insn (gen (rval
, mem
));
31142 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
31145 rtx (*gen
) (rtx
, rtx
, rtx
);
31151 case E_QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
31152 case E_HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
31153 case E_SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
31154 case E_DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
31156 gcc_unreachable ();
31163 case E_QImode
: gen
= gen_arm_store_exclusiveqi
; break;
31164 case E_HImode
: gen
= gen_arm_store_exclusivehi
; break;
31165 case E_SImode
: gen
= gen_arm_store_exclusivesi
; break;
31166 case E_DImode
: gen
= gen_arm_store_exclusivedi
; break;
31168 gcc_unreachable ();
31172 emit_insn (gen (bval
, rval
, mem
));
31175 /* Mark the previous jump instruction as unlikely. */
31178 emit_unlikely_jump (rtx insn
)
31180 rtx_insn
*jump
= emit_jump_insn (insn
);
31181 add_reg_br_prob_note (jump
, profile_probability::very_unlikely ());
31184 /* Expand a compare and swap pattern. */
31187 arm_expand_compare_and_swap (rtx operands
[])
31189 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
31190 machine_mode mode
, cmp_mode
;
31192 bval
= operands
[0];
31193 rval
= operands
[1];
31195 oldval
= operands
[3];
31196 newval
= operands
[4];
31197 is_weak
= operands
[5];
31198 mod_s
= operands
[6];
31199 mod_f
= operands
[7];
31200 mode
= GET_MODE (mem
);
31202 /* Normally the succ memory model must be stronger than fail, but in the
31203 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
31204 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
31206 if (TARGET_HAVE_LDACQ
31207 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
31208 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
31209 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
31215 /* For narrow modes, we're going to perform the comparison in SImode,
31216 so do the zero-extension now. */
31217 rval
= gen_reg_rtx (SImode
);
31218 oldval
= convert_modes (SImode
, mode
, oldval
, true);
31222 /* Force the value into a register if needed. We waited until after
31223 the zero-extension above to do this properly. */
31224 if (!arm_add_operand (oldval
, SImode
))
31225 oldval
= force_reg (SImode
, oldval
);
31229 if (!cmpdi_operand (oldval
, mode
))
31230 oldval
= force_reg (mode
, oldval
);
31234 gcc_unreachable ();
31238 cmp_mode
= E_SImode
;
31240 cmp_mode
= CC_Zmode
;
31242 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
31243 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode
, mode
, bdst
, rval
, mem
,
31244 oldval
, newval
, is_weak
, mod_s
, mod_f
));
31246 if (mode
== QImode
|| mode
== HImode
)
31247 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
31249 /* In all cases, we arrange for success to be signaled by Z set.
31250 This arrangement allows for the boolean result to be used directly
31251 in a subsequent branch, post optimization. For Thumb-1 targets, the
31252 boolean negation of the result is also stored in bval because Thumb-1
31253 backend lacks dependency tracking for CC flag due to flag-setting not
31254 being represented at RTL level. */
31256 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
31259 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
31260 emit_insn (gen_rtx_SET (bval
, x
));
31264 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
31265 another memory store between the load-exclusive and store-exclusive can
31266 reset the monitor from Exclusive to Open state. This means we must wait
31267 until after reload to split the pattern, lest we get a register spill in
31268 the middle of the atomic sequence. Success of the compare and swap is
31269 indicated by the Z flag set for 32bit targets and by neg_bval being zero
31270 for Thumb-1 targets (ie. negation of the boolean value returned by
31271 atomic_compare_and_swapmode standard pattern in operand 0). */
31274 arm_split_compare_and_swap (rtx operands
[])
31276 rtx rval
, mem
, oldval
, newval
, neg_bval
, mod_s_rtx
;
31278 enum memmodel mod_s
, mod_f
;
31280 rtx_code_label
*label1
, *label2
;
31283 rval
= operands
[1];
31285 oldval
= operands
[3];
31286 newval
= operands
[4];
31287 is_weak
= (operands
[5] != const0_rtx
);
31288 mod_s_rtx
= operands
[6];
31289 mod_s
= memmodel_from_int (INTVAL (mod_s_rtx
));
31290 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
31291 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
31292 mode
= GET_MODE (mem
);
31294 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
31296 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (mod_s_rtx
);
31297 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (mod_s_rtx
);
31299 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
31300 a full barrier is emitted after the store-release. */
31302 use_acquire
= false;
31304 /* Checks whether a barrier is needed and emits one accordingly. */
31305 if (!(use_acquire
|| use_release
))
31306 arm_pre_atomic_barrier (mod_s
);
31311 label1
= gen_label_rtx ();
31312 emit_label (label1
);
31314 label2
= gen_label_rtx ();
31316 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
31318 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31319 as required to communicate with arm_expand_compare_and_swap. */
31322 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
31323 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
31324 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
31325 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
31326 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
31330 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
31331 if (thumb1_cmpneg_operand (oldval
, SImode
))
31334 if (!satisfies_constraint_L (oldval
))
31336 gcc_assert (satisfies_constraint_J (oldval
));
31338 /* For such immediates, ADDS needs the source and destination regs
31341 Normally this would be handled by RA, but this is all happening
31343 emit_move_insn (neg_bval
, rval
);
31347 emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval
, src
, oldval
,
31352 emit_move_insn (neg_bval
, const1_rtx
);
31353 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
31357 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
31359 /* Weak or strong, we want EQ to be true for success, so that we
31360 match the flags that we got from the compare above. */
31363 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
31364 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
31365 emit_insn (gen_rtx_SET (cond
, x
));
31370 /* Z is set to boolean value of !neg_bval, as required to communicate
31371 with arm_expand_compare_and_swap. */
31372 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
31373 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
31376 if (!is_mm_relaxed (mod_f
))
31377 emit_label (label2
);
31379 /* Checks whether a barrier is needed and emits one accordingly. */
31381 || !(use_acquire
|| use_release
))
31382 arm_post_atomic_barrier (mod_s
);
31384 if (is_mm_relaxed (mod_f
))
31385 emit_label (label2
);
31388 /* Split an atomic operation pattern. Operation is given by CODE and is one
31389 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31390 operation). Operation is performed on the content at MEM and on VALUE
31391 following the memory model MODEL_RTX. The content at MEM before and after
31392 the operation is returned in OLD_OUT and NEW_OUT respectively while the
31393 success of the operation is returned in COND. Using a scratch register or
31394 an operand register for these determines what result is returned for that
31398 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
31399 rtx value
, rtx model_rtx
, rtx cond
)
31401 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
31402 machine_mode mode
= GET_MODE (mem
);
31403 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
31404 rtx_code_label
*label
;
31405 bool all_low_regs
, bind_old_new
;
31408 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
31410 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (model_rtx
);
31411 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (model_rtx
);
31413 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
31414 a full barrier is emitted after the store-release. */
31416 use_acquire
= false;
31418 /* Checks whether a barrier is needed and emits one accordingly. */
31419 if (!(use_acquire
|| use_release
))
31420 arm_pre_atomic_barrier (model
);
31422 label
= gen_label_rtx ();
31423 emit_label (label
);
31426 new_out
= gen_lowpart (wmode
, new_out
);
31428 old_out
= gen_lowpart (wmode
, old_out
);
31431 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
31433 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
31435 /* Does the operation require destination and first operand to use the same
31436 register? This is decided by register constraints of relevant insn
31437 patterns in thumb1.md. */
31438 gcc_assert (!new_out
|| REG_P (new_out
));
31439 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
31440 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
31441 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
31446 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
31448 /* We want to return the old value while putting the result of the operation
31449 in the same register as the old value so copy the old value over to the
31450 destination register and use that register for the operation. */
31451 if (old_out
&& bind_old_new
)
31453 emit_move_insn (new_out
, old_out
);
31464 x
= gen_rtx_AND (wmode
, old_out
, value
);
31465 emit_insn (gen_rtx_SET (new_out
, x
));
31466 x
= gen_rtx_NOT (wmode
, new_out
);
31467 emit_insn (gen_rtx_SET (new_out
, x
));
31471 if (CONST_INT_P (value
))
31473 value
= gen_int_mode (-INTVAL (value
), wmode
);
31479 if (mode
== DImode
)
31481 /* DImode plus/minus need to clobber flags. */
31482 /* The adddi3 and subdi3 patterns are incorrectly written so that
31483 they require matching operands, even when we could easily support
31484 three operands. Thankfully, this can be fixed up post-splitting,
31485 as the individual add+adc patterns do accept three operands and
31486 post-reload cprop can make these moves go away. */
31487 emit_move_insn (new_out
, old_out
);
31489 x
= gen_adddi3 (new_out
, new_out
, value
);
31491 x
= gen_subdi3 (new_out
, new_out
, value
);
31498 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
31499 emit_insn (gen_rtx_SET (new_out
, x
));
31503 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
31506 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
31507 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
31509 /* Checks whether a barrier is needed and emits one accordingly. */
31511 || !(use_acquire
|| use_release
))
31512 arm_post_atomic_barrier (model
);
31515 /* Return the mode for the MVE vector of predicates corresponding to MODE. */
31517 arm_mode_to_pred_mode (machine_mode mode
)
31519 switch (GET_MODE_NUNITS (mode
))
31521 case 16: return V16BImode
;
31522 case 8: return V8BImode
;
31523 case 4: return V4BImode
;
31524 case 2: return V2QImode
;
31526 return opt_machine_mode ();
31529 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31530 If CAN_INVERT, store either the result or its inverse in TARGET
31531 and return true if TARGET contains the inverse. If !CAN_INVERT,
31532 always store the result in TARGET, never its inverse.
31534 Note that the handling of floating-point comparisons is not
31538 arm_expand_vector_compare (rtx target
, rtx_code code
, rtx op0
, rtx op1
,
31541 machine_mode cmp_result_mode
= GET_MODE (target
);
31542 machine_mode cmp_mode
= GET_MODE (op0
);
31546 /* MVE supports more comparisons than Neon. */
31547 if (TARGET_HAVE_MVE
)
31552 /* For these we need to compute the inverse of the requested
31561 code
= reverse_condition_maybe_unordered (code
);
31564 /* Recursively emit the inverted comparison into a temporary
31565 and then store its inverse in TARGET. This avoids reusing
31566 TARGET (which for integer NE could be one of the inputs). */
31567 rtx tmp
= gen_reg_rtx (cmp_result_mode
);
31568 if (arm_expand_vector_compare (tmp
, code
, op0
, op1
, true))
31569 gcc_unreachable ();
31570 emit_insn (gen_rtx_SET (target
, gen_rtx_NOT (cmp_result_mode
, tmp
)));
31583 /* These are natively supported by Neon for zero comparisons, but otherwise
31584 require the operands to be swapped. For MVE, we can only compare
31588 if (!TARGET_HAVE_MVE
)
31589 if (op1
!= CONST0_RTX (cmp_mode
))
31591 code
= swap_condition (code
);
31592 std::swap (op0
, op1
);
31594 /* Fall through. */
31596 /* These are natively supported by Neon for both register and zero
31597 operands. MVE supports registers only. */
31602 if (TARGET_HAVE_MVE
)
31604 switch (GET_MODE_CLASS (cmp_mode
))
31606 case MODE_VECTOR_INT
:
31607 emit_insn (gen_mve_vcmpq (code
, cmp_mode
, target
,
31608 op0
, force_reg (cmp_mode
, op1
)));
31610 case MODE_VECTOR_FLOAT
:
31611 if (TARGET_HAVE_MVE_FLOAT
)
31612 emit_insn (gen_mve_vcmpq_f (code
, cmp_mode
, target
,
31613 op0
, force_reg (cmp_mode
, op1
)));
31615 gcc_unreachable ();
31618 gcc_unreachable ();
31622 emit_insn (gen_neon_vc (code
, cmp_mode
, target
, op0
, op1
));
31625 /* These are natively supported for register operands only.
31626 Comparisons with zero aren't useful and should be folded
31627 or canonicalized by target-independent code. */
31630 if (TARGET_HAVE_MVE
)
31631 emit_insn (gen_mve_vcmpq (code
, cmp_mode
, target
,
31632 op0
, force_reg (cmp_mode
, op1
)));
31634 emit_insn (gen_neon_vc (code
, cmp_mode
, target
,
31635 op0
, force_reg (cmp_mode
, op1
)));
31638 /* These require the operands to be swapped and likewise do not
31639 support comparisons with zero. */
31642 if (TARGET_HAVE_MVE
)
31643 emit_insn (gen_mve_vcmpq (swap_condition (code
), cmp_mode
, target
,
31644 force_reg (cmp_mode
, op1
), op0
));
31646 emit_insn (gen_neon_vc (swap_condition (code
), cmp_mode
,
31647 target
, force_reg (cmp_mode
, op1
), op0
));
31650 /* These need a combination of two comparisons. */
31654 /* Operands are LTGT iff (a > b || a > b).
31655 Operands are ORDERED iff (a > b || a <= b). */
31656 rtx gt_res
= gen_reg_rtx (cmp_result_mode
);
31657 rtx alt_res
= gen_reg_rtx (cmp_result_mode
);
31658 rtx_code alt_code
= (code
== LTGT
? LT
: LE
);
31659 if (arm_expand_vector_compare (gt_res
, GT
, op0
, op1
, true)
31660 || arm_expand_vector_compare (alt_res
, alt_code
, op0
, op1
, true))
31661 gcc_unreachable ();
31662 emit_insn (gen_rtx_SET (target
, gen_rtx_IOR (cmp_result_mode
,
31663 gt_res
, alt_res
)));
31668 gcc_unreachable ();
31672 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31673 CMP_RESULT_MODE is the mode of the comparison result. */
31676 arm_expand_vcond (rtx
*operands
, machine_mode cmp_result_mode
)
31678 /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31679 arm_expand_vector_compare, and another one here. */
31682 if (TARGET_HAVE_MVE
)
31683 mask
= gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode
).require ());
31685 mask
= gen_reg_rtx (cmp_result_mode
);
31687 bool inverted
= arm_expand_vector_compare (mask
, GET_CODE (operands
[3]),
31688 operands
[4], operands
[5], true);
31690 std::swap (operands
[1], operands
[2]);
31692 emit_insn (gen_neon_vbsl (GET_MODE (operands
[0]), operands
[0],
31693 mask
, operands
[1], operands
[2]));
31696 machine_mode cmp_mode
= GET_MODE (operands
[0]);
31698 switch (GET_MODE_CLASS (cmp_mode
))
31700 case MODE_VECTOR_INT
:
31701 emit_insn (gen_mve_q (VPSELQ_S
, VPSELQ_S
, cmp_mode
, operands
[0],
31702 operands
[1], operands
[2], mask
));
31704 case MODE_VECTOR_FLOAT
:
31705 if (TARGET_HAVE_MVE_FLOAT
)
31706 emit_insn (gen_mve_q_f (VPSELQ_F
, cmp_mode
, operands
[0],
31707 operands
[1], operands
[2], mask
));
31709 gcc_unreachable ();
31712 gcc_unreachable ();
31717 #define MAX_VECT_LEN 16
31719 struct expand_vec_perm_d
31721 rtx target
, op0
, op1
;
31722 vec_perm_indices perm
;
31723 machine_mode vmode
;
31728 /* Generate a variable permutation. */
31731 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
31733 machine_mode vmode
= GET_MODE (target
);
31734 bool one_vector_p
= rtx_equal_p (op0
, op1
);
31736 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
31737 gcc_checking_assert (GET_MODE (op0
) == vmode
);
31738 gcc_checking_assert (GET_MODE (op1
) == vmode
);
31739 gcc_checking_assert (GET_MODE (sel
) == vmode
);
31740 gcc_checking_assert (TARGET_NEON
);
31744 if (vmode
== V8QImode
)
31745 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
31747 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
31753 if (vmode
== V8QImode
)
31755 pair
= gen_reg_rtx (V16QImode
);
31756 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
31757 pair
= gen_lowpart (TImode
, pair
);
31758 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
31762 pair
= gen_reg_rtx (OImode
);
31763 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
31764 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
31770 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
31772 machine_mode vmode
= GET_MODE (target
);
31773 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
31774 bool one_vector_p
= rtx_equal_p (op0
, op1
);
31777 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31778 numbering of elements for big-endian, we must reverse the order. */
31779 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
31781 /* The VTBL instruction does not use a modulo index, so we must take care
31782 of that ourselves. */
31783 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31784 mask
= gen_const_vec_duplicate (vmode
, mask
);
31785 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
31787 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
31790 /* Map lane ordering between architectural lane order, and GCC lane order,
31791 taking into account ABI. See comment above output_move_neon for details. */
31794 neon_endian_lane_map (machine_mode mode
, int lane
)
31796 if (BYTES_BIG_ENDIAN
)
31798 int nelems
= GET_MODE_NUNITS (mode
);
31799 /* Reverse lane order. */
31800 lane
= (nelems
- 1 - lane
);
31801 /* Reverse D register order, to match ABI. */
31802 if (GET_MODE_SIZE (mode
) == 16)
31803 lane
= lane
^ (nelems
/ 2);
31808 /* Some permutations index into pairs of vectors, this is a helper function
31809 to map indexes into those pairs of vectors. */
31812 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
31814 int nelem
= GET_MODE_NUNITS (mode
);
31815 if (BYTES_BIG_ENDIAN
)
31817 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
31821 /* Generate or test for an insn that supports a constant permutation. */
31823 /* Recognize patterns for the VUZP insns. */
31826 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
31828 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
31829 rtx out0
, out1
, in0
, in1
;
31833 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
31836 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31837 big endian pattern on 64 bit vectors, so we correct for that. */
31838 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
31839 && GET_MODE_SIZE (d
->vmode
) == 8 ? nelt
: 0;
31841 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
31843 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
31845 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
31849 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31851 for (i
= 0; i
< nelt
; i
++)
31854 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
31855 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
31865 if (swap_nelt
!= 0)
31866 std::swap (in0
, in1
);
31869 out1
= gen_reg_rtx (d
->vmode
);
31871 std::swap (out0
, out1
);
31873 emit_insn (gen_neon_vuzp_internal (d
->vmode
, out0
, in0
, in1
, out1
));
31877 /* Recognize patterns for the VZIP insns. */
31880 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
31882 unsigned int i
, high
, mask
, nelt
= d
->perm
.length ();
31883 rtx out0
, out1
, in0
, in1
;
31887 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
31890 is_swapped
= BYTES_BIG_ENDIAN
;
31892 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
31895 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
31897 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
31901 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31903 for (i
= 0; i
< nelt
/ 2; i
++)
31906 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
31907 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
31911 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
31912 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
31924 std::swap (in0
, in1
);
31927 out1
= gen_reg_rtx (d
->vmode
);
31929 std::swap (out0
, out1
);
31931 emit_insn (gen_neon_vzip_internal (d
->vmode
, out0
, in0
, in1
, out1
));
31935 /* Recognize patterns for the VREV insns. */
31937 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
31939 unsigned int i
, j
, diff
, nelt
= d
->perm
.length ();
31940 rtx (*gen
) (machine_mode
, rtx
, rtx
);
31942 if (!d
->one_vector_p
)
31953 gen
= gen_neon_vrev64
;
31964 gen
= gen_neon_vrev32
;
31970 gen
= gen_neon_vrev64
;
31981 gen
= gen_neon_vrev16
;
31985 gen
= gen_neon_vrev32
;
31991 gen
= gen_neon_vrev64
;
32001 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
32002 for (j
= 0; j
<= diff
; j
+= 1)
32004 /* This is guaranteed to be true as the value of diff
32005 is 7, 3, 1 and we should have enough elements in the
32006 queue to generate this. Getting a vector mask with a
32007 value of diff other than these values implies that
32008 something is wrong by the time we get here. */
32009 gcc_assert (i
+ j
< nelt
);
32010 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
32018 emit_insn (gen (d
->vmode
, d
->target
, d
->op0
));
32022 /* Recognize patterns for the VTRN insns. */
32025 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
32027 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
32028 rtx out0
, out1
, in0
, in1
;
32030 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
32033 /* Note that these are little-endian tests. Adjust for big-endian later. */
32034 if (d
->perm
[0] == 0)
32036 else if (d
->perm
[0] == 1)
32040 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
32042 for (i
= 0; i
< nelt
; i
+= 2)
32044 if (d
->perm
[i
] != i
+ odd
)
32046 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
32056 if (BYTES_BIG_ENDIAN
)
32058 std::swap (in0
, in1
);
32063 out1
= gen_reg_rtx (d
->vmode
);
32065 std::swap (out0
, out1
);
32067 emit_insn (gen_neon_vtrn_internal (d
->vmode
, out0
, in0
, in1
, out1
));
32071 /* Recognize patterns for the VEXT insns. */
32074 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
32076 unsigned int i
, nelt
= d
->perm
.length ();
32079 unsigned int location
;
32081 unsigned int next
= d
->perm
[0] + 1;
32083 /* TODO: Handle GCC's numbering of elements for big-endian. */
32084 if (BYTES_BIG_ENDIAN
)
32087 /* Check if the extracted indexes are increasing by one. */
32088 for (i
= 1; i
< nelt
; next
++, i
++)
32090 /* If we hit the most significant element of the 2nd vector in
32091 the previous iteration, no need to test further. */
32092 if (next
== 2 * nelt
)
32095 /* If we are operating on only one vector: it could be a
32096 rotation. If there are only two elements of size < 64, let
32097 arm_evpc_neon_vrev catch it. */
32098 if (d
->one_vector_p
&& (next
== nelt
))
32100 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
32106 if (d
->perm
[i
] != next
)
32110 location
= d
->perm
[0];
32116 offset
= GEN_INT (location
);
32118 if(d
->vmode
== E_DImode
)
32121 emit_insn (gen_neon_vext (d
->vmode
, d
->target
, d
->op0
, d
->op1
, offset
));
32125 /* The NEON VTBL instruction is a fully variable permuation that's even
32126 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
32127 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
32128 can do slightly better by expanding this as a constant where we don't
32129 have to apply a mask. */
32132 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
32134 rtx rperm
[MAX_VECT_LEN
], sel
;
32135 machine_mode vmode
= d
->vmode
;
32136 unsigned int i
, nelt
= d
->perm
.length ();
32138 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
32139 numbering of elements for big-endian, we must reverse the order. */
32140 if (BYTES_BIG_ENDIAN
)
32146 /* Generic code will try constant permutation twice. Once with the
32147 original mode and again with the elements lowered to QImode.
32148 So wait and don't do the selector expansion ourselves. */
32149 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
32152 for (i
= 0; i
< nelt
; ++i
)
32153 rperm
[i
] = GEN_INT (d
->perm
[i
]);
32154 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
32155 sel
= force_reg (vmode
, sel
);
32157 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
32162 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
32164 /* Check if the input mask matches vext before reordering the
32167 if (arm_evpc_neon_vext (d
))
32170 /* The pattern matching functions above are written to look for a small
32171 number to begin the sequence (0, 1, N/2). If we begin with an index
32172 from the second operand, we can swap the operands. */
32173 unsigned int nelt
= d
->perm
.length ();
32174 if (d
->perm
[0] >= nelt
)
32176 d
->perm
.rotate_inputs (1);
32177 std::swap (d
->op0
, d
->op1
);
32182 if (arm_evpc_neon_vuzp (d
))
32184 if (arm_evpc_neon_vzip (d
))
32186 if (arm_evpc_neon_vrev (d
))
32188 if (arm_evpc_neon_vtrn (d
))
32190 return arm_evpc_neon_vtbl (d
);
32195 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
32198 arm_vectorize_vec_perm_const (machine_mode vmode
, machine_mode op_mode
,
32199 rtx target
, rtx op0
, rtx op1
,
32200 const vec_perm_indices
&sel
)
32202 if (vmode
!= op_mode
)
32205 struct expand_vec_perm_d d
;
32206 int i
, nelt
, which
;
32208 if (!VALID_NEON_DREG_MODE (vmode
) && !VALID_NEON_QREG_MODE (vmode
))
32214 rtx nop0
= force_reg (vmode
, op0
);
32220 op1
= force_reg (vmode
, op1
);
32225 gcc_assert (VECTOR_MODE_P (d
.vmode
));
32226 d
.testing_p
= !target
;
32228 nelt
= GET_MODE_NUNITS (d
.vmode
);
32229 for (i
= which
= 0; i
< nelt
; ++i
)
32231 int ei
= sel
[i
] & (2 * nelt
- 1);
32232 which
|= (ei
< nelt
? 1 : 2);
32241 d
.one_vector_p
= false;
32242 if (d
.testing_p
|| !rtx_equal_p (op0
, op1
))
32245 /* The elements of PERM do not suggest that only the first operand
32246 is used, but both operands are identical. Allow easier matching
32247 of the permutation by folding the permutation into the single
32252 d
.one_vector_p
= true;
32257 d
.one_vector_p
= true;
32261 d
.perm
.new_vector (sel
.encoding (), d
.one_vector_p
? 1 : 2, nelt
);
32264 return arm_expand_vec_perm_const_1 (&d
);
32266 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
32267 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
32268 if (!d
.one_vector_p
)
32269 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
32272 bool ret
= arm_expand_vec_perm_const_1 (&d
);
32279 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
32281 /* If we are soft float and we do not have ldrd
32282 then all auto increment forms are ok. */
32283 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
32288 /* Post increment and Pre Decrement are supported for all
32289 instruction forms except for vector forms. */
32292 if (VECTOR_MODE_P (mode
))
32294 if (code
!= ARM_PRE_DEC
)
32304 /* Without LDRD and mode size greater than
32305 word size, there is no point in auto-incrementing
32306 because ldm and stm will not have these forms. */
32307 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
32310 /* Vector and floating point modes do not support
32311 these auto increment forms. */
32312 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
32325 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32326 on ARM, since we know that shifts by negative amounts are no-ops.
32327 Additionally, the default expansion code is not available or suitable
32328 for post-reload insn splits (this can occur when the register allocator
32329 chooses not to do a shift in NEON).
32331 This function is used in both initial expand and post-reload splits, and
32332 handles all kinds of 64-bit shifts.
32334 Input requirements:
32335 - It is safe for the input and output to be the same register, but
32336 early-clobber rules apply for the shift amount and scratch registers.
32337 - Shift by register requires both scratch registers. In all other cases
32338 the scratch registers may be NULL.
32339 - Ashiftrt by a register also clobbers the CC register. */
32341 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
32342 rtx amount
, rtx scratch1
, rtx scratch2
)
32344 rtx out_high
= gen_highpart (SImode
, out
);
32345 rtx out_low
= gen_lowpart (SImode
, out
);
32346 rtx in_high
= gen_highpart (SImode
, in
);
32347 rtx in_low
= gen_lowpart (SImode
, in
);
32350 in = the register pair containing the input value.
32351 out = the destination register pair.
32352 up = the high- or low-part of each pair.
32353 down = the opposite part to "up".
32354 In a shift, we can consider bits to shift from "up"-stream to
32355 "down"-stream, so in a left-shift "up" is the low-part and "down"
32356 is the high-part of each register pair. */
32358 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
32359 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
32360 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
32361 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
32363 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
32365 && (REG_P (out
) || SUBREG_P (out
))
32366 && GET_MODE (out
) == DImode
);
32368 && (REG_P (in
) || SUBREG_P (in
))
32369 && GET_MODE (in
) == DImode
);
32371 && (((REG_P (amount
) || SUBREG_P (amount
))
32372 && GET_MODE (amount
) == SImode
)
32373 || CONST_INT_P (amount
)));
32374 gcc_assert (scratch1
== NULL
32375 || (GET_CODE (scratch1
) == SCRATCH
)
32376 || (GET_MODE (scratch1
) == SImode
32377 && REG_P (scratch1
)));
32378 gcc_assert (scratch2
== NULL
32379 || (GET_CODE (scratch2
) == SCRATCH
)
32380 || (GET_MODE (scratch2
) == SImode
32381 && REG_P (scratch2
)));
32382 gcc_assert (!REG_P (out
) || !REG_P (amount
)
32383 || !HARD_REGISTER_P (out
)
32384 || (REGNO (out
) != REGNO (amount
)
32385 && REGNO (out
) + 1 != REGNO (amount
)));
32387 /* Macros to make following code more readable. */
32388 #define SUB_32(DEST,SRC) \
32389 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32390 #define RSB_32(DEST,SRC) \
32391 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32392 #define SUB_S_32(DEST,SRC) \
32393 gen_addsi3_compare0 ((DEST), (SRC), \
32395 #define SET(DEST,SRC) \
32396 gen_rtx_SET ((DEST), (SRC))
32397 #define SHIFT(CODE,SRC,AMOUNT) \
32398 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32399 #define LSHIFT(CODE,SRC,AMOUNT) \
32400 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32401 SImode, (SRC), (AMOUNT))
32402 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32403 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32404 SImode, (SRC), (AMOUNT))
32406 gen_rtx_IOR (SImode, (A), (B))
32407 #define BRANCH(COND,LABEL) \
32408 gen_arm_cond_branch ((LABEL), \
32409 gen_rtx_ ## COND (CCmode, cc_reg, \
32413 /* Shifts by register and shifts by constant are handled separately. */
32414 if (CONST_INT_P (amount
))
32416 /* We have a shift-by-constant. */
32418 /* First, handle out-of-range shift amounts.
32419 In both cases we try to match the result an ARM instruction in a
32420 shift-by-register would give. This helps reduce execution
32421 differences between optimization levels, but it won't stop other
32422 parts of the compiler doing different things. This is "undefined
32423 behavior, in any case. */
32424 if (INTVAL (amount
) <= 0)
32425 emit_insn (gen_movdi (out
, in
));
32426 else if (INTVAL (amount
) >= 64)
32428 if (code
== ASHIFTRT
)
32430 rtx const31_rtx
= GEN_INT (31);
32431 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
32432 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
32435 emit_insn (gen_movdi (out
, const0_rtx
));
32438 /* Now handle valid shifts. */
32439 else if (INTVAL (amount
) < 32)
32441 /* Shifts by a constant less than 32. */
32442 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
32444 /* Clearing the out register in DImode first avoids lots
32445 of spilling and results in less stack usage.
32446 Later this redundant insn is completely removed.
32447 Do that only if "in" and "out" are different registers. */
32448 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
32449 emit_insn (SET (out
, const0_rtx
));
32450 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
32451 emit_insn (SET (out_down
,
32452 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
32454 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
32458 /* Shifts by a constant greater than 31. */
32459 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
32461 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
32462 emit_insn (SET (out
, const0_rtx
));
32463 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
32464 if (code
== ASHIFTRT
)
32465 emit_insn (gen_ashrsi3 (out_up
, in_up
,
32468 emit_insn (SET (out_up
, const0_rtx
));
32473 /* We have a shift-by-register. */
32474 rtx cc_reg
= gen_rtx_REG (CC_NZmode
, CC_REGNUM
);
32476 /* This alternative requires the scratch registers. */
32477 gcc_assert (scratch1
&& REG_P (scratch1
));
32478 gcc_assert (scratch2
&& REG_P (scratch2
));
32480 /* We will need the values "amount-32" and "32-amount" later.
32481 Swapping them around now allows the later code to be more general. */
32485 emit_insn (SUB_32 (scratch1
, amount
));
32486 emit_insn (RSB_32 (scratch2
, amount
));
32489 emit_insn (RSB_32 (scratch1
, amount
));
32490 /* Also set CC = amount > 32. */
32491 emit_insn (SUB_S_32 (scratch2
, amount
));
32494 emit_insn (RSB_32 (scratch1
, amount
));
32495 emit_insn (SUB_32 (scratch2
, amount
));
32498 gcc_unreachable ();
32501 /* Emit code like this:
32504 out_down = in_down << amount;
32505 out_down = (in_up << (amount - 32)) | out_down;
32506 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32507 out_up = in_up << amount;
32510 out_down = in_down >> amount;
32511 out_down = (in_up << (32 - amount)) | out_down;
32513 out_down = ((signed)in_up >> (amount - 32)) | out_down;
32514 out_up = in_up << amount;
32517 out_down = in_down >> amount;
32518 out_down = (in_up << (32 - amount)) | out_down;
32520 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32521 out_up = in_up << amount;
32523 The ARM and Thumb2 variants are the same but implemented slightly
32524 differently. If this were only called during expand we could just
32525 use the Thumb2 case and let combine do the right thing, but this
32526 can also be called from post-reload splitters. */
32528 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
32530 if (!TARGET_THUMB2
)
32532 /* Emit code for ARM mode. */
32533 emit_insn (SET (out_down
,
32534 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
32535 if (code
== ASHIFTRT
)
32537 rtx_code_label
*done_label
= gen_label_rtx ();
32538 emit_jump_insn (BRANCH (LT
, done_label
));
32539 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
32541 emit_label (done_label
);
32544 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
32549 /* Emit code for Thumb2 mode.
32550 Thumb2 can't do shift and or in one insn. */
32551 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
32552 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
32554 if (code
== ASHIFTRT
)
32556 rtx_code_label
*done_label
= gen_label_rtx ();
32557 emit_jump_insn (BRANCH (LT
, done_label
));
32558 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
32559 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
32560 emit_label (done_label
);
32564 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
32565 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
32569 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
32583 /* Returns true if the pattern is a valid symbolic address, which is either a
32584 symbol_ref or (symbol_ref + addend).
32586 According to the ARM ELF ABI, the initial addend of REL-type relocations
32587 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32588 literal field of the instruction as a 16-bit signed value in the range
32589 -32768 <= A < 32768.
32591 In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32592 unsigned range of 0 <= A < 256 as described in the AAELF32
32593 relocation handling documentation: REL-type relocations are encoded
32594 as unsigned in this case. */
32597 arm_valid_symbolic_address_p (rtx addr
)
32599 rtx xop0
, xop1
= NULL_RTX
;
32602 if (target_word_relocations
)
32605 if (SYMBOL_REF_P (tmp
) || LABEL_REF_P (tmp
))
32608 /* (const (plus: symbol_ref const_int)) */
32609 if (GET_CODE (addr
) == CONST
)
32610 tmp
= XEXP (addr
, 0);
32612 if (GET_CODE (tmp
) == PLUS
)
32614 xop0
= XEXP (tmp
, 0);
32615 xop1
= XEXP (tmp
, 1);
32617 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
32619 if (TARGET_THUMB1
&& !TARGET_HAVE_MOVT
)
32620 return IN_RANGE (INTVAL (xop1
), 0, 0xff);
32622 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
32629 /* Returns true if a valid comparison operation and makes
32630 the operands in a form that is valid. */
32632 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
32634 enum rtx_code code
= GET_CODE (*comparison
);
32636 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
32637 ? GET_MODE (*op2
) : GET_MODE (*op1
);
32639 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
32641 if (code
== UNEQ
|| code
== LTGT
)
32644 code_int
= (int)code
;
32645 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
32646 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
32651 if (!arm_add_operand (*op1
, mode
))
32652 *op1
= force_reg (mode
, *op1
);
32653 if (!arm_add_operand (*op2
, mode
))
32654 *op2
= force_reg (mode
, *op2
);
32658 /* gen_compare_reg() will sort out any invalid operands. */
32662 if (!TARGET_VFP_FP16INST
)
32664 /* FP16 comparisons are done in SF mode. */
32666 *op1
= convert_to_mode (mode
, *op1
, 1);
32667 *op2
= convert_to_mode (mode
, *op2
, 1);
32668 /* Fall through. */
32671 if (!vfp_compare_operand (*op1
, mode
))
32672 *op1
= force_reg (mode
, *op1
);
32673 if (!vfp_compare_operand (*op2
, mode
))
32674 *op2
= force_reg (mode
, *op2
);
32684 /* Maximum number of instructions to set block of memory. */
32686 arm_block_set_max_insns (void)
32688 if (optimize_function_for_size_p (cfun
))
32691 return current_tune
->max_insns_inline_memset
;
32694 /* Return TRUE if it's profitable to set block of memory for
32695 non-vectorized case. VAL is the value to set the memory
32696 with. LENGTH is the number of bytes to set. ALIGN is the
32697 alignment of the destination memory in bytes. UNALIGNED_P
32698 is TRUE if we can only set the memory with instructions
32699 meeting alignment requirements. USE_STRD_P is TRUE if we
32700 can use strd to set the memory. */
32702 arm_block_set_non_vect_profit_p (rtx val
,
32703 unsigned HOST_WIDE_INT length
,
32704 unsigned HOST_WIDE_INT align
,
32705 bool unaligned_p
, bool use_strd_p
)
32708 /* For leftovers in bytes of 0-7, we can set the memory block using
32709 strb/strh/str with minimum instruction number. */
32710 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32714 num
= arm_const_inline_cost (SET
, val
);
32715 num
+= length
/ align
+ length
% align
;
32717 else if (use_strd_p
)
32719 num
= arm_const_double_inline_cost (val
);
32720 num
+= (length
>> 3) + leftover
[length
& 7];
32724 num
= arm_const_inline_cost (SET
, val
);
32725 num
+= (length
>> 2) + leftover
[length
& 3];
32728 /* We may be able to combine last pair STRH/STRB into a single STR
32729 by shifting one byte back. */
32730 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
32733 return (num
<= arm_block_set_max_insns ());
32736 /* Return TRUE if it's profitable to set block of memory for
32737 vectorized case. LENGTH is the number of bytes to set.
32738 ALIGN is the alignment of destination memory in bytes.
32739 MODE is the vector mode used to set the memory. */
32741 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
32742 unsigned HOST_WIDE_INT align
,
32746 bool unaligned_p
= ((align
& 3) != 0);
32747 unsigned int nelt
= GET_MODE_NUNITS (mode
);
32749 /* Instruction loading constant value. */
32751 /* Instructions storing the memory. */
32752 num
+= (length
+ nelt
- 1) / nelt
;
32753 /* Instructions adjusting the address expression. Only need to
32754 adjust address expression if it's 4 bytes aligned and bytes
32755 leftover can only be stored by mis-aligned store instruction. */
32756 if (!unaligned_p
&& (length
& 3) != 0)
32759 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
32760 if (!unaligned_p
&& mode
== V16QImode
)
32763 return (num
<= arm_block_set_max_insns ());
32766 /* Set a block of memory using vectorization instructions for the
32767 unaligned case. We fill the first LENGTH bytes of the memory
32768 area starting from DSTBASE with byte constant VALUE. ALIGN is
32769 the alignment requirement of memory. Return TRUE if succeeded. */
32771 arm_block_set_unaligned_vect (rtx dstbase
,
32772 unsigned HOST_WIDE_INT length
,
32773 unsigned HOST_WIDE_INT value
,
32774 unsigned HOST_WIDE_INT align
)
32776 unsigned int i
, nelt_v16
, nelt_v8
, nelt_mode
;
32779 rtx (*gen_func
) (rtx
, rtx
);
32781 unsigned HOST_WIDE_INT v
= value
;
32782 unsigned int offset
= 0;
32783 gcc_assert ((align
& 0x3) != 0);
32784 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
32785 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
32786 if (length
>= nelt_v16
)
32789 gen_func
= gen_movmisalignv16qi
;
32794 gen_func
= gen_movmisalignv8qi
;
32796 nelt_mode
= GET_MODE_NUNITS (mode
);
32797 gcc_assert (length
>= nelt_mode
);
32798 /* Skip if it isn't profitable. */
32799 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
32802 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32803 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32805 v
= sext_hwi (v
, BITS_PER_WORD
);
32807 reg
= gen_reg_rtx (mode
);
32808 val_vec
= gen_const_vec_duplicate (mode
, GEN_INT (v
));
32809 /* Emit instruction loading the constant value. */
32810 emit_move_insn (reg
, val_vec
);
32812 /* Handle nelt_mode bytes in a vector. */
32813 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
32815 emit_insn ((*gen_func
) (mem
, reg
));
32816 if (i
+ 2 * nelt_mode
<= length
)
32818 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
32819 offset
+= nelt_mode
;
32820 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32824 /* If there are not less than nelt_v8 bytes leftover, we must be in
32826 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
32828 /* Handle (8, 16) bytes leftover. */
32829 if (i
+ nelt_v8
< length
)
32831 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
32832 offset
+= length
- i
;
32833 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32835 /* We are shifting bytes back, set the alignment accordingly. */
32836 if ((length
& 1) != 0 && align
>= 2)
32837 set_mem_align (mem
, BITS_PER_UNIT
);
32839 emit_insn (gen_movmisalignv16qi (mem
, reg
));
32841 /* Handle (0, 8] bytes leftover. */
32842 else if (i
< length
&& i
+ nelt_v8
>= length
)
32844 if (mode
== V16QImode
)
32845 reg
= gen_lowpart (V8QImode
, reg
);
32847 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
32848 + (nelt_mode
- nelt_v8
))));
32849 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
32850 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
32852 /* We are shifting bytes back, set the alignment accordingly. */
32853 if ((length
& 1) != 0 && align
>= 2)
32854 set_mem_align (mem
, BITS_PER_UNIT
);
32856 emit_insn (gen_movmisalignv8qi (mem
, reg
));
32862 /* Set a block of memory using vectorization instructions for the
32863 aligned case. We fill the first LENGTH bytes of the memory area
32864 starting from DSTBASE with byte constant VALUE. ALIGN is the
32865 alignment requirement of memory. Return TRUE if succeeded. */
32867 arm_block_set_aligned_vect (rtx dstbase
,
32868 unsigned HOST_WIDE_INT length
,
32869 unsigned HOST_WIDE_INT value
,
32870 unsigned HOST_WIDE_INT align
)
32872 unsigned int i
, nelt_v8
, nelt_v16
, nelt_mode
;
32873 rtx dst
, addr
, mem
;
32876 unsigned int offset
= 0;
32878 gcc_assert ((align
& 0x3) == 0);
32879 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
32880 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
32881 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
32886 nelt_mode
= GET_MODE_NUNITS (mode
);
32887 gcc_assert (length
>= nelt_mode
);
32888 /* Skip if it isn't profitable. */
32889 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
32892 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32894 reg
= gen_reg_rtx (mode
);
32895 val_vec
= gen_const_vec_duplicate (mode
, gen_int_mode (value
, QImode
));
32896 /* Emit instruction loading the constant value. */
32897 emit_move_insn (reg
, val_vec
);
32900 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
32901 if (mode
== V16QImode
)
32903 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32904 emit_insn (gen_movmisalignv16qi (mem
, reg
));
32906 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
32907 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
32909 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
32910 offset
+= length
- nelt_mode
;
32911 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32912 /* We are shifting bytes back, set the alignment accordingly. */
32913 if ((length
& 0x3) == 0)
32914 set_mem_align (mem
, BITS_PER_UNIT
* 4);
32915 else if ((length
& 0x1) == 0)
32916 set_mem_align (mem
, BITS_PER_UNIT
* 2);
32918 set_mem_align (mem
, BITS_PER_UNIT
);
32920 emit_insn (gen_movmisalignv16qi (mem
, reg
));
32923 /* Fall through for bytes leftover. */
32925 nelt_mode
= GET_MODE_NUNITS (mode
);
32926 reg
= gen_lowpart (V8QImode
, reg
);
32929 /* Handle 8 bytes in a vector. */
32930 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
32932 addr
= plus_constant (Pmode
, dst
, i
);
32933 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
32934 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
32935 emit_move_insn (mem
, reg
);
32937 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
32940 /* Handle single word leftover by shifting 4 bytes back. We can
32941 use aligned access for this case. */
32942 if (i
+ UNITS_PER_WORD
== length
)
32944 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
32945 offset
+= i
- UNITS_PER_WORD
;
32946 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
32947 /* We are shifting 4 bytes back, set the alignment accordingly. */
32948 if (align
> UNITS_PER_WORD
)
32949 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
32951 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
32953 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32954 We have to use unaligned access for this case. */
32955 else if (i
< length
)
32957 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
32958 offset
+= length
- nelt_mode
;
32959 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32960 /* We are shifting bytes back, set the alignment accordingly. */
32961 if ((length
& 1) == 0)
32962 set_mem_align (mem
, BITS_PER_UNIT
* 2);
32964 set_mem_align (mem
, BITS_PER_UNIT
);
32966 emit_insn (gen_movmisalignv8qi (mem
, reg
));
32972 /* Set a block of memory using plain strh/strb instructions, only
32973 using instructions allowed by ALIGN on processor. We fill the
32974 first LENGTH bytes of the memory area starting from DSTBASE
32975 with byte constant VALUE. ALIGN is the alignment requirement
32978 arm_block_set_unaligned_non_vect (rtx dstbase
,
32979 unsigned HOST_WIDE_INT length
,
32980 unsigned HOST_WIDE_INT value
,
32981 unsigned HOST_WIDE_INT align
)
32984 rtx dst
, addr
, mem
;
32985 rtx val_exp
, val_reg
, reg
;
32987 HOST_WIDE_INT v
= value
;
32989 gcc_assert (align
== 1 || align
== 2);
32992 v
|= (value
<< BITS_PER_UNIT
);
32994 v
= sext_hwi (v
, BITS_PER_WORD
);
32995 val_exp
= GEN_INT (v
);
32996 /* Skip if it isn't profitable. */
32997 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32998 align
, true, false))
33001 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
33002 mode
= (align
== 2 ? HImode
: QImode
);
33003 val_reg
= force_reg (SImode
, val_exp
);
33004 reg
= gen_lowpart (mode
, val_reg
);
33006 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
33008 addr
= plus_constant (Pmode
, dst
, i
);
33009 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
33010 emit_move_insn (mem
, reg
);
33013 /* Handle single byte leftover. */
33014 if (i
+ 1 == length
)
33016 reg
= gen_lowpart (QImode
, val_reg
);
33017 addr
= plus_constant (Pmode
, dst
, i
);
33018 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
33019 emit_move_insn (mem
, reg
);
33023 gcc_assert (i
== length
);
33027 /* Set a block of memory using plain strd/str/strh/strb instructions,
33028 to permit unaligned copies on processors which support unaligned
33029 semantics for those instructions. We fill the first LENGTH bytes
33030 of the memory area starting from DSTBASE with byte constant VALUE.
33031 ALIGN is the alignment requirement of memory. */
33033 arm_block_set_aligned_non_vect (rtx dstbase
,
33034 unsigned HOST_WIDE_INT length
,
33035 unsigned HOST_WIDE_INT value
,
33036 unsigned HOST_WIDE_INT align
)
33039 rtx dst
, addr
, mem
;
33040 rtx val_exp
, val_reg
, reg
;
33041 unsigned HOST_WIDE_INT v
;
33044 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
33045 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
33047 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
33048 if (length
< UNITS_PER_WORD
)
33049 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
33052 v
|= (v
<< BITS_PER_WORD
);
33054 v
= sext_hwi (v
, BITS_PER_WORD
);
33056 val_exp
= GEN_INT (v
);
33057 /* Skip if it isn't profitable. */
33058 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
33059 align
, false, use_strd_p
))
33064 /* Try without strd. */
33065 v
= (v
>> BITS_PER_WORD
);
33066 v
= sext_hwi (v
, BITS_PER_WORD
);
33067 val_exp
= GEN_INT (v
);
33068 use_strd_p
= false;
33069 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
33070 align
, false, use_strd_p
))
33075 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
33076 /* Handle double words using strd if possible. */
33079 val_reg
= force_reg (DImode
, val_exp
);
33081 for (; (i
+ 8 <= length
); i
+= 8)
33083 addr
= plus_constant (Pmode
, dst
, i
);
33084 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
33085 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
33086 emit_move_insn (mem
, reg
);
33088 emit_insn (gen_unaligned_storedi (mem
, reg
));
33092 val_reg
= force_reg (SImode
, val_exp
);
33094 /* Handle words. */
33095 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
33096 for (; (i
+ 4 <= length
); i
+= 4)
33098 addr
= plus_constant (Pmode
, dst
, i
);
33099 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
33100 if ((align
& 3) == 0)
33101 emit_move_insn (mem
, reg
);
33103 emit_insn (gen_unaligned_storesi (mem
, reg
));
33106 /* Merge last pair of STRH and STRB into a STR if possible. */
33107 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
33109 addr
= plus_constant (Pmode
, dst
, i
- 1);
33110 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
33111 /* We are shifting one byte back, set the alignment accordingly. */
33112 if ((align
& 1) == 0)
33113 set_mem_align (mem
, BITS_PER_UNIT
);
33115 /* Most likely this is an unaligned access, and we can't tell at
33116 compilation time. */
33117 emit_insn (gen_unaligned_storesi (mem
, reg
));
33121 /* Handle half word leftover. */
33122 if (i
+ 2 <= length
)
33124 reg
= gen_lowpart (HImode
, val_reg
);
33125 addr
= plus_constant (Pmode
, dst
, i
);
33126 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
33127 if ((align
& 1) == 0)
33128 emit_move_insn (mem
, reg
);
33130 emit_insn (gen_unaligned_storehi (mem
, reg
));
33135 /* Handle single byte leftover. */
33136 if (i
+ 1 == length
)
33138 reg
= gen_lowpart (QImode
, val_reg
);
33139 addr
= plus_constant (Pmode
, dst
, i
);
33140 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
33141 emit_move_insn (mem
, reg
);
33147 /* Set a block of memory using vectorization instructions for both
33148 aligned and unaligned cases. We fill the first LENGTH bytes of
33149 the memory area starting from DSTBASE with byte constant VALUE.
33150 ALIGN is the alignment requirement of memory. */
33152 arm_block_set_vect (rtx dstbase
,
33153 unsigned HOST_WIDE_INT length
,
33154 unsigned HOST_WIDE_INT value
,
33155 unsigned HOST_WIDE_INT align
)
33157 /* Check whether we need to use unaligned store instruction. */
33158 if (((align
& 3) != 0 || (length
& 3) != 0)
33159 /* Check whether unaligned store instruction is available. */
33160 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
33163 if ((align
& 3) == 0)
33164 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
33166 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
33169 /* Expand string store operation. Firstly we try to do that by using
33170 vectorization instructions, then try with ARM unaligned access and
33171 double-word store if profitable. OPERANDS[0] is the destination,
33172 OPERANDS[1] is the number of bytes, operands[2] is the value to
33173 initialize the memory, OPERANDS[3] is the known alignment of the
33176 arm_gen_setmem (rtx
*operands
)
33178 rtx dstbase
= operands
[0];
33179 unsigned HOST_WIDE_INT length
;
33180 unsigned HOST_WIDE_INT value
;
33181 unsigned HOST_WIDE_INT align
;
33183 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
33186 length
= UINTVAL (operands
[1]);
33190 value
= (UINTVAL (operands
[2]) & 0xFF);
33191 align
= UINTVAL (operands
[3]);
33192 if (TARGET_NEON
&& length
>= 8
33193 && current_tune
->string_ops_prefer_neon
33194 && arm_block_set_vect (dstbase
, length
, value
, align
))
33197 if (!unaligned_access
&& (align
& 3) != 0)
33198 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
33200 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
33205 arm_macro_fusion_p (void)
33207 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
33210 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
33211 for MOVW / MOVT macro fusion. */
33214 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
33216 /* We are trying to fuse
33217 movw imm / movt imm
33218 instructions as a group that gets scheduled together. */
33220 rtx set_dest
= SET_DEST (curr_set
);
33222 if (GET_MODE (set_dest
) != SImode
)
33225 /* We are trying to match:
33226 prev (movw) == (set (reg r0) (const_int imm16))
33227 curr (movt) == (set (zero_extract (reg r0)
33230 (const_int imm16_1))
33232 prev (movw) == (set (reg r1)
33233 (high (symbol_ref ("SYM"))))
33234 curr (movt) == (set (reg r0)
33236 (symbol_ref ("SYM")))) */
33238 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
33240 if (CONST_INT_P (SET_SRC (curr_set
))
33241 && CONST_INT_P (SET_SRC (prev_set
))
33242 && REG_P (XEXP (set_dest
, 0))
33243 && REG_P (SET_DEST (prev_set
))
33244 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
33248 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
33249 && REG_P (SET_DEST (curr_set
))
33250 && REG_P (SET_DEST (prev_set
))
33251 && GET_CODE (SET_SRC (prev_set
)) == HIGH
33252 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
33259 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
33261 rtx prev_set
= single_set (prev
);
33262 rtx curr_set
= single_set (curr
);
33268 if (any_condjump_p (curr
))
33271 if (!arm_macro_fusion_p ())
33274 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
33275 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
33281 /* Return true iff the instruction fusion described by OP is enabled. */
33283 arm_fusion_enabled_p (tune_params::fuse_ops op
)
33285 return current_tune
->fusible_ops
& op
;
33288 /* Return TRUE if return address signing mechanism is enabled. */
33290 arm_current_function_pac_enabled_p (void)
33292 return (aarch_ra_sign_scope
== AARCH_FUNCTION_ALL
33293 || (aarch_ra_sign_scope
== AARCH_FUNCTION_NON_LEAF
33294 && !crtl
->is_leaf
));
33297 /* Raise an error if the current target arch is not bti compatible. */
33298 void aarch_bti_arch_check (void)
33300 if (!arm_arch8m_main
)
33301 error ("This architecture does not support branch protection instructions");
33304 /* Return TRUE if Branch Target Identification Mechanism is enabled. */
33306 aarch_bti_enabled (void)
33308 return aarch_enable_bti
!= 0;
33311 /* Check if INSN is a BTI J insn. */
33313 aarch_bti_j_insn_p (rtx_insn
*insn
)
33315 if (!insn
|| !INSN_P (insn
))
33318 rtx pat
= PATTERN (insn
);
33319 return GET_CODE (pat
) == UNSPEC_VOLATILE
&& XINT (pat
, 1) == VUNSPEC_BTI_NOP
;
33322 /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction. */
33324 aarch_pac_insn_p (rtx x
)
33326 if (!x
|| !INSN_P (x
))
33329 rtx pat
= PATTERN (x
);
33331 if (GET_CODE (pat
) == SET
)
33333 rtx tmp
= XEXP (pat
, 1);
33335 && ((GET_CODE (tmp
) == UNSPEC
33336 && XINT (tmp
, 1) == UNSPEC_PAC_NOP
)
33337 || (GET_CODE (tmp
) == UNSPEC_VOLATILE
33338 && XINT (tmp
, 1) == VUNSPEC_PACBTI_NOP
)))
33345 /* Target specific mapping for aarch_gen_bti_c and aarch_gen_bti_j.
33346 For Arm, both of these map to a simple BTI instruction. */
33349 aarch_gen_bti_c (void)
33351 return gen_bti_nop ();
33355 aarch_gen_bti_j (void)
33357 return gen_bti_nop ();
33360 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
33361 scheduled for speculative execution. Reject the long-running division
33362 and square-root instructions. */
33365 arm_sched_can_speculate_insn (rtx_insn
*insn
)
33367 switch (get_attr_type (insn
))
33375 case TYPE_NEON_FP_SQRT_S
:
33376 case TYPE_NEON_FP_SQRT_D
:
33377 case TYPE_NEON_FP_SQRT_S_Q
:
33378 case TYPE_NEON_FP_SQRT_D_Q
:
33379 case TYPE_NEON_FP_DIV_S
:
33380 case TYPE_NEON_FP_DIV_D
:
33381 case TYPE_NEON_FP_DIV_S_Q
:
33382 case TYPE_NEON_FP_DIV_D_Q
:
33389 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
33391 static unsigned HOST_WIDE_INT
33392 arm_asan_shadow_offset (void)
33394 return HOST_WIDE_INT_1U
<< 29;
33398 /* This is a temporary fix for PR60655. Ideally we need
33399 to handle most of these cases in the generic part but
33400 currently we reject minus (..) (sym_ref). We try to
33401 ameliorate the case with minus (sym_ref1) (sym_ref2)
33402 where they are in the same section. */
33405 arm_const_not_ok_for_debug_p (rtx p
)
33407 tree decl_op0
= NULL
;
33408 tree decl_op1
= NULL
;
33410 if (GET_CODE (p
) == UNSPEC
)
33412 if (GET_CODE (p
) == MINUS
)
33414 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
33416 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
33418 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
33419 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
33421 if ((VAR_P (decl_op1
)
33422 || TREE_CODE (decl_op1
) == CONST_DECL
)
33423 && (VAR_P (decl_op0
)
33424 || TREE_CODE (decl_op0
) == CONST_DECL
))
33425 return (get_variable_section (decl_op1
, false)
33426 != get_variable_section (decl_op0
, false));
33428 if (TREE_CODE (decl_op1
) == LABEL_DECL
33429 && TREE_CODE (decl_op0
) == LABEL_DECL
)
33430 return (DECL_CONTEXT (decl_op1
)
33431 != DECL_CONTEXT (decl_op0
));
33441 /* return TRUE if x is a reference to a value in a constant pool */
33443 arm_is_constant_pool_ref (rtx x
)
33446 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
33447 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
33450 /* Remember the last target of arm_set_current_function. */
33451 static GTY(()) tree arm_previous_fndecl
;
33453 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
33456 save_restore_target_globals (tree new_tree
)
33458 /* If we have a previous state, use it. */
33459 if (TREE_TARGET_GLOBALS (new_tree
))
33460 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
33461 else if (new_tree
== target_option_default_node
)
33462 restore_target_globals (&default_target_globals
);
33465 /* Call target_reinit and save the state for TARGET_GLOBALS. */
33466 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
33469 arm_option_params_internal ();
33472 /* Invalidate arm_previous_fndecl. */
33475 arm_reset_previous_fndecl (void)
33477 arm_previous_fndecl
= NULL_TREE
;
33480 /* Establish appropriate back-end context for processing the function
33481 FNDECL. The argument might be NULL to indicate processing at top
33482 level, outside of any function scope. */
33485 arm_set_current_function (tree fndecl
)
33487 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
33490 tree old_tree
= (arm_previous_fndecl
33491 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
33494 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
33496 /* If current function has no attributes but previous one did,
33497 use the default node. */
33498 if (! new_tree
&& old_tree
)
33499 new_tree
= target_option_default_node
;
33501 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
33502 the default have been handled by save_restore_target_globals from
33503 arm_pragma_target_parse. */
33504 if (old_tree
== new_tree
)
33507 arm_previous_fndecl
= fndecl
;
33509 /* First set the target options. */
33510 cl_target_option_restore (&global_options
, &global_options_set
,
33511 TREE_TARGET_OPTION (new_tree
));
33513 save_restore_target_globals (new_tree
);
33515 arm_override_options_after_change_1 (&global_options
, &global_options_set
);
33518 /* Implement TARGET_OPTION_PRINT. */
33521 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
33523 int flags
= ptr
->x_target_flags
;
33524 const char *fpu_name
;
33526 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
33527 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
33529 fprintf (file
, "%*sselected isa %s\n", indent
, "",
33530 TARGET_THUMB2_P (flags
) ? "thumb2" :
33531 TARGET_THUMB_P (flags
) ? "thumb1" :
33534 if (ptr
->x_arm_arch_string
)
33535 fprintf (file
, "%*sselected architecture %s\n", indent
, "",
33536 ptr
->x_arm_arch_string
);
33538 if (ptr
->x_arm_cpu_string
)
33539 fprintf (file
, "%*sselected CPU %s\n", indent
, "",
33540 ptr
->x_arm_cpu_string
);
33542 if (ptr
->x_arm_tune_string
)
33543 fprintf (file
, "%*sselected tune %s\n", indent
, "",
33544 ptr
->x_arm_tune_string
);
33546 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
33549 /* Hook to determine if one function can safely inline another. */
33552 arm_can_inline_p (tree caller
, tree callee
)
33554 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
33555 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
33556 bool can_inline
= true;
33558 struct cl_target_option
*caller_opts
33559 = TREE_TARGET_OPTION (caller_tree
? caller_tree
33560 : target_option_default_node
);
33562 struct cl_target_option
*callee_opts
33563 = TREE_TARGET_OPTION (callee_tree
? callee_tree
33564 : target_option_default_node
);
33566 if (callee_opts
== caller_opts
)
33569 /* Callee's ISA features should be a subset of the caller's. */
33570 struct arm_build_target caller_target
;
33571 struct arm_build_target callee_target
;
33572 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
33573 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
33575 arm_configure_build_target (&caller_target
, caller_opts
, false);
33576 arm_configure_build_target (&callee_target
, callee_opts
, false);
33577 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
33578 can_inline
= false;
33580 sbitmap_free (caller_target
.isa
);
33581 sbitmap_free (callee_target
.isa
);
33583 /* OK to inline between different modes.
33584 Function with mode specific instructions, e.g using asm,
33585 must be explicitly protected with noinline. */
33589 /* Hook to fix function's alignment affected by target attribute. */
33592 arm_relayout_function (tree fndecl
)
33594 if (DECL_USER_ALIGN (fndecl
))
33597 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
33600 callee_tree
= target_option_default_node
;
33602 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
33605 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
33608 /* Inner function to process the attribute((target(...))), take an argument and
33609 set the current options from the argument. If we have a list, recursively
33610 go over the list. */
33613 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
33615 if (TREE_CODE (args
) == TREE_LIST
)
33619 for (; args
; args
= TREE_CHAIN (args
))
33620 if (TREE_VALUE (args
)
33621 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
33626 else if (TREE_CODE (args
) != STRING_CST
)
33628 error ("attribute %<target%> argument not a string");
33632 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
33635 while ((q
= strtok (argstr
, ",")) != NULL
)
33638 if (!strcmp (q
, "thumb"))
33640 opts
->x_target_flags
|= MASK_THUMB
;
33641 if (TARGET_FDPIC
&& !arm_arch_thumb2
)
33642 sorry ("FDPIC mode is not supported in Thumb-1 mode");
33645 else if (!strcmp (q
, "arm"))
33646 opts
->x_target_flags
&= ~MASK_THUMB
;
33648 else if (!strcmp (q
, "general-regs-only"))
33649 opts
->x_target_flags
|= MASK_GENERAL_REGS_ONLY
;
33651 else if (startswith (q
, "fpu="))
33654 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+ 4,
33655 &fpu_index
, CL_TARGET
))
33657 error ("invalid fpu for target attribute or pragma %qs", q
);
33660 if (fpu_index
== TARGET_FPU_auto
)
33662 /* This doesn't really make sense until we support
33663 general dynamic selection of the architecture and all
33665 sorry ("auto fpu selection not currently permitted here");
33668 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
33670 else if (startswith (q
, "arch="))
33672 char *arch
= q
+ 5;
33673 const arch_option
*arm_selected_arch
33674 = arm_parse_arch_option_name (all_architectures
, "arch", arch
);
33676 if (!arm_selected_arch
)
33678 error ("invalid architecture for target attribute or pragma %qs",
33683 opts
->x_arm_arch_string
= xstrndup (arch
, strlen (arch
));
33685 else if (q
[0] == '+')
33687 opts
->x_arm_arch_string
33688 = xasprintf ("%s%s", opts
->x_arm_arch_string
, q
);
33692 error ("unknown target attribute or pragma %qs", q
);
33700 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
33703 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
33704 struct gcc_options
*opts_set
)
33706 struct cl_target_option cl_opts
;
33708 if (!arm_valid_target_attribute_rec (args
, opts
))
33711 cl_target_option_save (&cl_opts
, opts
, opts_set
);
33712 arm_configure_build_target (&arm_active_target
, &cl_opts
, false);
33713 arm_option_check_internal (opts
);
33714 /* Do any overrides, such as global options arch=xxx.
33715 We do this since arm_active_target was overridden. */
33716 arm_option_reconfigure_globals ();
33717 arm_options_perform_arch_sanity_checks ();
33718 arm_option_override_internal (opts
, opts_set
);
33720 return build_target_option_node (opts
, opts_set
);
33724 add_attribute (const char * mode
, tree
*attributes
)
33726 size_t len
= strlen (mode
);
33727 tree value
= build_string (len
, mode
);
33729 TREE_TYPE (value
) = build_array_type (char_type_node
,
33730 build_index_type (size_int (len
)));
33732 *attributes
= tree_cons (get_identifier ("target"),
33733 build_tree_list (NULL_TREE
, value
),
33737 /* For testing. Insert thumb or arm modes alternatively on functions. */
33740 arm_insert_attributes (tree fndecl
, tree
* attributes
)
33744 if (! TARGET_FLIP_THUMB
)
33747 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
33748 || fndecl_built_in_p (fndecl
) || DECL_ARTIFICIAL (fndecl
))
33751 /* Nested definitions must inherit mode. */
33752 if (current_function_decl
)
33754 mode
= TARGET_THUMB
? "thumb" : "arm";
33755 add_attribute (mode
, attributes
);
33759 /* If there is already a setting don't change it. */
33760 if (lookup_attribute ("target", *attributes
) != NULL
)
33763 mode
= thumb_flipper
? "thumb" : "arm";
33764 add_attribute (mode
, attributes
);
33766 thumb_flipper
= !thumb_flipper
;
33769 /* Hook to validate attribute((target("string"))). */
33772 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
33773 tree args
, int ARG_UNUSED (flags
))
33776 struct gcc_options func_options
, func_options_set
;
33777 tree cur_tree
, new_optimize
;
33778 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
33780 /* Get the optimization options of the current function. */
33781 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
33783 /* If the function changed the optimization levels as well as setting target
33784 options, start with the optimizations specified. */
33785 if (!func_optimize
)
33786 func_optimize
= optimization_default_node
;
33788 /* Init func_options. */
33789 memset (&func_options
, 0, sizeof (func_options
));
33790 init_options_struct (&func_options
, NULL
);
33791 lang_hooks
.init_options_struct (&func_options
);
33792 memset (&func_options_set
, 0, sizeof (func_options_set
));
33794 /* Initialize func_options to the defaults. */
33795 cl_optimization_restore (&func_options
, &func_options_set
,
33796 TREE_OPTIMIZATION (func_optimize
));
33798 cl_target_option_restore (&func_options
, &func_options_set
,
33799 TREE_TARGET_OPTION (target_option_default_node
));
33801 /* Set func_options flags with new target mode. */
33802 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
33803 &func_options_set
);
33805 if (cur_tree
== NULL_TREE
)
33808 new_optimize
= build_optimization_node (&func_options
, &func_options_set
);
33810 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
33812 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
33817 /* Match an ISA feature bitmap to a named FPU. We always use the
33818 first entry that exactly matches the feature set, so that we
33819 effectively canonicalize the FPU name for the assembler. */
33821 arm_identify_fpu_from_isa (sbitmap isa
)
33823 auto_sbitmap
fpubits (isa_num_bits
);
33824 auto_sbitmap
cand_fpubits (isa_num_bits
);
33826 bitmap_and (fpubits
, isa
, isa_all_fpubits_internal
);
33828 /* If there are no ISA feature bits relating to the FPU, we must be
33829 doing soft-float. */
33830 if (bitmap_empty_p (fpubits
))
33833 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
33835 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
33836 if (bitmap_equal_p (fpubits
, cand_fpubits
))
33837 return all_fpus
[i
].name
;
33839 /* We must find an entry, or things have gone wrong. */
33840 gcc_unreachable ();
33843 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
33844 by the function fndecl. */
33846 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
33848 tree target_parts
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
33850 struct cl_target_option
*targ_options
;
33852 targ_options
= TREE_TARGET_OPTION (target_parts
);
33854 targ_options
= TREE_TARGET_OPTION (target_option_current_node
);
33855 gcc_assert (targ_options
);
33857 arm_print_asm_arch_directives (stream
, targ_options
);
33859 fprintf (stream
, "\t.syntax unified\n");
33863 if (is_called_in_ARM_mode (decl
)
33864 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
33865 && cfun
->is_thunk
))
33866 fprintf (stream
, "\t.code 32\n");
33867 else if (TARGET_THUMB1
)
33868 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
33870 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
33873 fprintf (stream
, "\t.arm\n");
33875 if (TARGET_POKE_FUNCTION_NAME
)
33876 arm_poke_function_name (stream
, (const char *) name
);
33879 /* If MEM is in the form of [base+offset], extract the two parts
33880 of address and set to BASE and OFFSET, otherwise return false
33881 after clearing BASE and OFFSET. */
33884 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
33888 gcc_assert (MEM_P (mem
));
33890 addr
= XEXP (mem
, 0);
33892 /* Strip off const from addresses like (const (addr)). */
33893 if (GET_CODE (addr
) == CONST
)
33894 addr
= XEXP (addr
, 0);
33899 *offset
= const0_rtx
;
33903 if (GET_CODE (addr
) == PLUS
33904 && GET_CODE (XEXP (addr
, 0)) == REG
33905 && CONST_INT_P (XEXP (addr
, 1)))
33907 *base
= XEXP (addr
, 0);
33908 *offset
= XEXP (addr
, 1);
33913 *offset
= NULL_RTX
;
33918 /* If INSN is a load or store of address in the form of [base+offset],
33919 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
33920 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
33921 otherwise return FALSE. */
33924 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
33928 gcc_assert (INSN_P (insn
));
33929 x
= PATTERN (insn
);
33930 if (GET_CODE (x
) != SET
)
33934 dest
= SET_DEST (x
);
33935 if (REG_P (src
) && MEM_P (dest
))
33938 extract_base_offset_in_addr (dest
, base
, offset
);
33940 else if (MEM_P (src
) && REG_P (dest
))
33943 extract_base_offset_in_addr (src
, base
, offset
);
33948 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
33951 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33953 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33954 and PRI are only calculated for these instructions. For other instruction,
33955 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
33956 instruction fusion can be supported by returning different priorities.
33958 It's important that irrelevant instructions get the largest FUSION_PRI. */
33961 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
33962 int *fusion_pri
, int *pri
)
33968 gcc_assert (INSN_P (insn
));
33971 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
33978 /* Load goes first. */
33980 *fusion_pri
= tmp
- 1;
33982 *fusion_pri
= tmp
- 2;
33986 /* INSN with smaller base register goes first. */
33987 tmp
-= ((REGNO (base
) & 0xff) << 20);
33989 /* INSN with smaller offset goes first. */
33990 off_val
= (int)(INTVAL (offset
));
33992 tmp
-= (off_val
& 0xfffff);
33994 tmp
+= ((- off_val
) & 0xfffff);
34001 /* Construct and return a PARALLEL RTX vector with elements numbering the
34002 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
34003 the vector - from the perspective of the architecture. This does not
34004 line up with GCC's perspective on lane numbers, so we end up with
34005 different masks depending on our target endian-ness. The diagram
34006 below may help. We must draw the distinction when building masks
34007 which select one half of the vector. An instruction selecting
34008 architectural low-lanes for a big-endian target, must be described using
34009 a mask selecting GCC high-lanes.
34011 Big-Endian Little-Endian
34013 GCC 0 1 2 3 3 2 1 0
34014 | x | x | x | x | | x | x | x | x |
34015 Architecture 3 2 1 0 3 2 1 0
34017 Low Mask: { 2, 3 } { 0, 1 }
34018 High Mask: { 0, 1 } { 2, 3 }
34022 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
34024 int nunits
= GET_MODE_NUNITS (mode
);
34025 rtvec v
= rtvec_alloc (nunits
/ 2);
34026 int high_base
= nunits
/ 2;
34032 if (BYTES_BIG_ENDIAN
)
34033 base
= high
? low_base
: high_base
;
34035 base
= high
? high_base
: low_base
;
34037 for (i
= 0; i
< nunits
/ 2; i
++)
34038 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
34040 t1
= gen_rtx_PARALLEL (mode
, v
);
34044 /* Check OP for validity as a PARALLEL RTX vector with elements
34045 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
34046 from the perspective of the architecture. See the diagram above
34047 arm_simd_vect_par_cnst_half_p for more details. */
34050 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
34053 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
34054 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
34055 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
34058 if (!VECTOR_MODE_P (mode
))
34061 if (count_op
!= count_ideal
)
34064 for (i
= 0; i
< count_ideal
; i
++)
34066 rtx elt_op
= XVECEXP (op
, 0, i
);
34067 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
34069 if (!CONST_INT_P (elt_op
)
34070 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
34076 /* Can output mi_thunk for all cases except for non-zero vcall_offset
34079 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
34082 /* For now, we punt and not handle this for TARGET_THUMB1. */
34083 if (vcall_offset
&& TARGET_THUMB1
)
34086 /* Otherwise ok. */
34090 /* Generate RTL for a conditional branch with rtx comparison CODE in
34091 mode CC_MODE. The destination of the unlikely conditional branch
34095 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
34099 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
34100 gen_rtx_REG (cc_mode
, CC_REGNUM
),
34103 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
34104 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
34106 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
34109 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
34111 For pure-code sections there is no letter code for this attribute, so
34112 output all the section flags numerically when this is needed. */
34115 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
34118 if (flags
& SECTION_ARM_PURECODE
)
34122 if (!(flags
& SECTION_DEBUG
))
34124 if (flags
& SECTION_EXCLUDE
)
34125 *num
|= 0x80000000;
34126 if (flags
& SECTION_WRITE
)
34128 if (flags
& SECTION_CODE
)
34130 if (flags
& SECTION_MERGE
)
34132 if (flags
& SECTION_STRINGS
)
34134 if (flags
& SECTION_TLS
)
34136 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
34145 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
34147 If pure-code is passed as an option, make sure all functions are in
34148 sections that have the SHF_ARM_PURECODE attribute. */
34151 arm_function_section (tree decl
, enum node_frequency freq
,
34152 bool startup
, bool exit
)
34154 const char * section_name
;
34157 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
34158 return default_function_section (decl
, freq
, startup
, exit
);
34160 if (!target_pure_code
)
34161 return default_function_section (decl
, freq
, startup
, exit
);
34164 section_name
= DECL_SECTION_NAME (decl
);
34166 /* If a function is not in a named section then it falls under the 'default'
34167 text section, also known as '.text'. We can preserve previous behavior as
34168 the default text section already has the SHF_ARM_PURECODE section
34172 section
*default_sec
= default_function_section (decl
, freq
, startup
,
34175 /* If default_sec is not null, then it must be a special section like for
34176 example .text.startup. We set the pure-code attribute and return the
34177 same section to preserve existing behavior. */
34179 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
34180 return default_sec
;
34183 /* Otherwise look whether a section has already been created with
34185 sec
= get_named_section (decl
, section_name
, 0);
34187 /* If that is not the case passing NULL as the section's name to
34188 'get_named_section' will create a section with the declaration's
34190 sec
= get_named_section (decl
, NULL
, 0);
34192 /* Set the SHF_ARM_PURECODE attribute. */
34193 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
34198 /* Implements the TARGET_SECTION_FLAGS hook.
34200 If DECL is a function declaration and pure-code is passed as an option
34201 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
34202 section's name and RELOC indicates whether the declarations initializer may
34203 contain runtime relocations. */
34205 static unsigned int
34206 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
34208 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
34210 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
34211 flags
|= SECTION_ARM_PURECODE
;
34216 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
34219 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
34221 rtx
*quot_p
, rtx
*rem_p
)
34223 if (mode
== SImode
)
34224 gcc_assert (!TARGET_IDIV
);
34226 scalar_int_mode libval_mode
34227 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode
));
34229 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
34230 libval_mode
, op0
, mode
, op1
, mode
);
34232 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
34233 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
34234 GET_MODE_SIZE (mode
));
34236 gcc_assert (quotient
);
34237 gcc_assert (remainder
);
34239 *quot_p
= quotient
;
34240 *rem_p
= remainder
;
34243 /* This function checks for the availability of the coprocessor builtin passed
34244 in BUILTIN for the current target. Returns true if it is available and
34245 false otherwise. If a BUILTIN is passed for which this function has not
34246 been implemented it will cause an exception. */
34249 arm_coproc_builtin_available (enum unspecv builtin
)
34251 /* None of these builtins are available in Thumb mode if the target only
34252 supports Thumb-1. */
34270 case VUNSPEC_LDC2L
:
34272 case VUNSPEC_STC2L
:
34275 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
34282 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
34284 if (arm_arch6
|| arm_arch5te
)
34287 case VUNSPEC_MCRR2
:
34288 case VUNSPEC_MRRC2
:
34293 gcc_unreachable ();
34298 /* This function returns true if OP is a valid memory operand for the ldc and
34299 stc coprocessor instructions and false otherwise. */
34302 arm_coproc_ldc_stc_legitimate_address (rtx op
)
34304 HOST_WIDE_INT range
;
34305 /* Has to be a memory operand. */
34311 /* We accept registers. */
34315 switch GET_CODE (op
)
34319 /* Or registers with an offset. */
34320 if (!REG_P (XEXP (op
, 0)))
34325 /* The offset must be an immediate though. */
34326 if (!CONST_INT_P (op
))
34329 range
= INTVAL (op
);
34331 /* Within the range of [-1020,1020]. */
34332 if (!IN_RANGE (range
, -1020, 1020))
34335 /* And a multiple of 4. */
34336 return (range
% 4) == 0;
34342 return REG_P (XEXP (op
, 0));
34344 gcc_unreachable ();
34349 /* Return the diagnostic message string if conversion from FROMTYPE to
34350 TOTYPE is not allowed, NULL otherwise. */
34352 static const char *
34353 arm_invalid_conversion (const_tree fromtype
, const_tree totype
)
34355 if (element_mode (fromtype
) != element_mode (totype
))
34357 /* Do no allow conversions to/from BFmode scalar types. */
34358 if (TYPE_MODE (fromtype
) == BFmode
)
34359 return N_("invalid conversion from type %<bfloat16_t%>");
34360 if (TYPE_MODE (totype
) == BFmode
)
34361 return N_("invalid conversion to type %<bfloat16_t%>");
34364 /* Conversion allowed. */
34368 /* Return the diagnostic message string if the unary operation OP is
34369 not permitted on TYPE, NULL otherwise. */
34371 static const char *
34372 arm_invalid_unary_op (int op
, const_tree type
)
34374 /* Reject all single-operand operations on BFmode except for &. */
34375 if (element_mode (type
) == BFmode
&& op
!= ADDR_EXPR
)
34376 return N_("operation not permitted on type %<bfloat16_t%>");
34378 /* Operation allowed. */
34382 /* Return the diagnostic message string if the binary operation OP is
34383 not permitted on TYPE1 and TYPE2, NULL otherwise. */
34385 static const char *
34386 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
,
34389 /* Reject all 2-operand operations on BFmode. */
34390 if (element_mode (type1
) == BFmode
34391 || element_mode (type2
) == BFmode
)
34392 return N_("operation not permitted on type %<bfloat16_t%>");
34394 /* Operation allowed. */
34398 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34400 In VFPv1, VFP registers could only be accessed in the mode they were
34401 set, so subregs would be invalid there. However, we don't support
34402 VFPv1 at the moment, and the restriction was lifted in VFPv2.
34404 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34405 VFP registers in little-endian order. We can't describe that accurately to
34406 GCC, so avoid taking subregs of such values.
34408 The only exception is going from a 128-bit to a 64-bit type. In that
34409 case the data layout happens to be consistent for big-endian, so we
34410 explicitly allow that case. */
34413 arm_can_change_mode_class (machine_mode from
, machine_mode to
,
34414 reg_class_t rclass
)
34417 && !(GET_MODE_SIZE (from
) == 16 && GET_MODE_SIZE (to
) == 8)
34418 && (GET_MODE_SIZE (from
) > UNITS_PER_WORD
34419 || GET_MODE_SIZE (to
) > UNITS_PER_WORD
)
34420 && reg_classes_intersect_p (VFP_REGS
, rclass
))
34425 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
34426 strcpy from constants will be faster. */
34428 static HOST_WIDE_INT
34429 arm_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
34431 unsigned int factor
= (TARGET_THUMB
|| ! arm_tune_xscale
? 1 : 2);
34432 if (TREE_CODE (exp
) == STRING_CST
&& !optimize_size
)
34433 return MAX (align
, BITS_PER_WORD
* factor
);
34437 /* Emit a speculation barrier on target architectures that do not have
34438 DSB/ISB directly. Such systems probably don't need a barrier
34439 themselves, but if the code is ever run on a later architecture, it
34440 might become a problem. */
34442 arm_emit_speculation_barrier_function ()
34444 emit_library_call (speculation_barrier_libfunc
, LCT_NORMAL
, VOIDmode
);
34447 /* Have we recorded an explicit access to the Q bit of APSR?. */
34449 arm_q_bit_access (void)
34451 if (cfun
&& cfun
->decl
)
34452 return lookup_attribute ("acle qbit",
34453 DECL_ATTRIBUTES (cfun
->decl
));
34457 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
34459 arm_ge_bits_access (void)
34461 if (cfun
&& cfun
->decl
)
34462 return lookup_attribute ("acle gebits",
34463 DECL_ATTRIBUTES (cfun
->decl
));
34467 /* NULL if insn INSN is valid within a low-overhead loop.
34468 Otherwise return why doloop cannot be applied. */
34470 static const char *
34471 arm_invalid_within_doloop (const rtx_insn
*insn
)
34473 if (!TARGET_HAVE_LOB
)
34474 return default_invalid_within_doloop (insn
);
34477 return "Function call in the loop.";
34479 if (reg_mentioned_p (gen_rtx_REG (SImode
, LR_REGNUM
), insn
))
34480 return "LR is used inside loop.";
34486 arm_target_insn_ok_for_lob (rtx insn
)
34488 basic_block bb
= BLOCK_FOR_INSN (insn
);
34489 /* Make sure the basic block of the target insn is a simple latch
34490 having as single predecessor and successor the body of the loop
34491 itself. Only simple loops with a single basic block as body are
34492 supported for 'low over head loop' making sure that LE target is
34493 above LE itself in the generated code. */
34495 return single_succ_p (bb
)
34496 && single_pred_p (bb
)
34497 && single_succ_edge (bb
)->dest
== single_pred_edge (bb
)->src
34498 && contains_no_active_insn_p (bb
);
34502 namespace selftest
{
34504 /* Scan the static data tables generated by parsecpu.awk looking for
34505 potential issues with the data. We primarily check for
34506 inconsistencies in the option extensions at present (extensions
34507 that duplicate others but aren't marked as aliases). Furthermore,
34508 for correct canonicalization later options must never be a subset
34509 of an earlier option. Any extension should also only specify other
34510 feature bits and never an architecture bit. The architecture is inferred
34511 from the declaration of the extension. */
34513 arm_test_cpu_arch_data (void)
34515 const arch_option
*arch
;
34516 const cpu_option
*cpu
;
34517 auto_sbitmap
target_isa (isa_num_bits
);
34518 auto_sbitmap
isa1 (isa_num_bits
);
34519 auto_sbitmap
isa2 (isa_num_bits
);
34521 for (arch
= all_architectures
; arch
->common
.name
!= NULL
; ++arch
)
34523 const cpu_arch_extension
*ext1
, *ext2
;
34525 if (arch
->common
.extensions
== NULL
)
34528 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
34530 for (ext1
= arch
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
34535 arm_initialize_isa (isa1
, ext1
->isa_bits
);
34536 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
34538 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
34541 arm_initialize_isa (isa2
, ext2
->isa_bits
);
34542 /* If the option is a subset of the parent option, it doesn't
34543 add anything and so isn't useful. */
34544 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
34546 /* If the extension specifies any architectural bits then
34547 disallow it. Extensions should only specify feature bits. */
34548 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
34553 for (cpu
= all_cores
; cpu
->common
.name
!= NULL
; ++cpu
)
34555 const cpu_arch_extension
*ext1
, *ext2
;
34557 if (cpu
->common
.extensions
== NULL
)
34560 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
34562 for (ext1
= cpu
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
34567 arm_initialize_isa (isa1
, ext1
->isa_bits
);
34568 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
34570 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
34573 arm_initialize_isa (isa2
, ext2
->isa_bits
);
34574 /* If the option is a subset of the parent option, it doesn't
34575 add anything and so isn't useful. */
34576 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
34578 /* If the extension specifies any architectural bits then
34579 disallow it. Extensions should only specify feature bits. */
34580 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
34586 /* Scan the static data tables generated by parsecpu.awk looking for
34587 potential issues with the data. Here we check for consistency between the
34588 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34589 a feature bit that is not defined by any FPU flag. */
34591 arm_test_fpu_data (void)
34593 auto_sbitmap
isa_all_fpubits_internal (isa_num_bits
);
34594 auto_sbitmap
fpubits (isa_num_bits
);
34595 auto_sbitmap
tmpset (isa_num_bits
);
34597 static const enum isa_feature fpu_bitlist_internal
[]
34598 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
34599 arm_initialize_isa (isa_all_fpubits_internal
, fpu_bitlist_internal
);
34601 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
34603 arm_initialize_isa (fpubits
, all_fpus
[i
].isa_bits
);
34604 bitmap_and_compl (tmpset
, isa_all_fpubits_internal
, fpubits
);
34605 bitmap_clear (isa_all_fpubits_internal
);
34606 bitmap_copy (isa_all_fpubits_internal
, tmpset
);
34609 if (!bitmap_empty_p (isa_all_fpubits_internal
))
34611 fprintf (stderr
, "Error: found feature bits in the ALL_FPU_INTERAL"
34612 " group that are not defined by any FPU.\n"
34613 " Check your arm-cpus.in.\n");
34614 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal
));
34619 arm_run_selftests (void)
34621 arm_test_cpu_arch_data ();
34622 arm_test_fpu_data ();
34624 } /* Namespace selftest. */
34626 #undef TARGET_RUN_TARGET_SELFTESTS
34627 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34628 #endif /* CHECKING_P */
34630 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34631 global variable based guard use the default else
34632 return a null tree. */
34634 arm_stack_protect_guard (void)
34636 if (arm_stack_protector_guard
== SSP_GLOBAL
)
34637 return default_stack_protect_guard ();
34642 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34643 Unlike the arm version, we do NOT implement asm flag outputs. */
34646 thumb1_md_asm_adjust (vec
<rtx
> &outputs
, vec
<rtx
> & /*inputs*/,
34647 vec
<machine_mode
> & /*input_modes*/,
34648 vec
<const char *> &constraints
,
34649 vec
<rtx
> &, vec
<rtx
> & /*clobbers*/,
34650 HARD_REG_SET
& /*clobbered_regs*/, location_t
/*loc*/)
34652 for (unsigned i
= 0, n
= outputs
.length (); i
< n
; ++i
)
34653 if (startswith (constraints
[i
], "=@cc"))
34655 sorry ("%<asm%> flags not supported in thumb1 mode");
34661 /* Generate code to enable conditional branches in functions over 1 MiB.
34663 operands: is the operands list of the asm insn (see arm_cond_branch or
34664 arm_cond_branch_reversed).
34665 pos_label: is an index into the operands array where operands[pos_label] is
34666 the asm label of the final jump destination.
34667 dest: is a string which is used to generate the asm label of the intermediate
34669 branch_format: is a string denoting the intermediate branch format, e.g.
34670 "beq", "bne", etc. */
34673 arm_gen_far_branch (rtx
* operands
, int pos_label
, const char * dest
,
34674 const char * branch_format
)
34676 rtx_code_label
* tmp_label
= gen_label_rtx ();
34677 char label_buf
[256];
34679 ASM_GENERATE_INTERNAL_LABEL (label_buf
, dest
, \
34680 CODE_LABEL_NUMBER (tmp_label
));
34681 const char *label_ptr
= arm_strip_name_encoding (label_buf
);
34682 rtx dest_label
= operands
[pos_label
];
34683 operands
[pos_label
] = tmp_label
;
34685 snprintf (buffer
, sizeof (buffer
), "%s%s", branch_format
, label_ptr
);
34686 output_asm_insn (buffer
, operands
);
34688 snprintf (buffer
, sizeof (buffer
), "b\t%%l0%d\n%s:", pos_label
, label_ptr
);
34689 operands
[pos_label
] = dest_label
;
34690 output_asm_insn (buffer
, operands
);
34694 /* If given mode matches, load from memory to LO_REGS.
34695 (i.e [Rn], Rn <= LO_REGS). */
34697 arm_mode_base_reg_class (machine_mode mode
)
34699 if (TARGET_HAVE_MVE
34700 && (mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
))
34703 return MODE_BASE_REG_REG_CLASS (mode
);
34706 struct gcc_target targetm
= TARGET_INITIALIZER
;
34708 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
34711 arm_get_mask_mode (machine_mode mode
)
34713 if (TARGET_HAVE_MVE
)
34714 return arm_mode_to_pred_mode (mode
);
34716 return default_get_mask_mode (mode
);
34719 /* Output assembly to read the thread pointer from the appropriate TPIDR
34720 register into DEST. If PRED_P also emit the %? that can be used to
34721 output the predication code. */
34724 arm_output_load_tpidr (rtx dst
, bool pred_p
)
34727 int tpidr_coproc_num
= -1;
34728 switch (target_thread_pointer
)
34731 tpidr_coproc_num
= 2;
34734 tpidr_coproc_num
= 3;
34737 tpidr_coproc_num
= 4;
34740 gcc_unreachable ();
34742 snprintf (buf
, sizeof (buf
),
34743 "mrc%s\tp15, 0, %%0, c13, c0, %d\t@ load_tp_hard",
34744 pred_p
? "%?" : "", tpidr_coproc_num
);
34745 output_asm_insn (buf
, &dst
);
34749 #include "gt-arm.h"