1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2023 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
26 #define INCLUDE_STRING
28 #include "coretypes.h"
38 #include "stringpool.h"
45 #include "diagnostic-core.h"
47 #include "fold-const.h"
48 #include "stor-layout.h"
52 #include "insn-attr.h"
58 #include "sched-int.h"
59 #include "common/common-target.h"
60 #include "langhooks.h"
65 #include "target-globals.h"
67 #include "tm-constrs.h"
69 #include "optabs-libfuncs.h"
72 #include "gimple-iterator.h"
74 #include "tree-vectorizer.h"
76 #include "aarch-common.h"
77 #include "aarch-common-protos.h"
79 /* This file should be included last. */
80 #include "target-def.h"
82 /* Forward definitions of types. */
83 typedef struct minipool_node Mnode
;
84 typedef struct minipool_fixup Mfix
;
86 void (*arm_lang_output_object_attributes_hook
)(void);
93 /* Forward function declarations. */
94 static bool arm_const_not_ok_for_debug_p (rtx
);
95 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
96 static int arm_compute_static_chain_stack_bytes (void);
97 static arm_stack_offsets
*arm_get_frame_offsets (void);
98 static void arm_compute_frame_layout (void);
99 static void arm_add_gc_roots (void);
100 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
101 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
102 static unsigned bit_count (unsigned long);
103 static unsigned bitmap_popcount (const sbitmap
);
104 static int arm_address_register_rtx_p (rtx
, int);
105 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
106 static bool is_called_in_ARM_mode (tree
);
107 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
108 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
109 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
110 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
111 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
112 inline static int thumb1_index_register_rtx_p (rtx
, int);
113 static int thumb_far_jump_used_p (void);
114 static bool thumb_force_lr_save (void);
115 static unsigned arm_size_return_regs (void);
116 static bool arm_assemble_integer (rtx
, unsigned int, int);
117 static void arm_print_operand (FILE *, rtx
, int);
118 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
119 static bool arm_print_operand_punct_valid_p (unsigned char code
);
120 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
121 static arm_cc
get_arm_condition_code (rtx
);
122 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
123 static const char *output_multi_immediate (rtx
*, const char *, const char *,
125 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
126 static struct machine_function
*arm_init_machine_status (void);
127 static void thumb_exit (FILE *, int);
128 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
129 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
130 static Mnode
*add_minipool_forward_ref (Mfix
*);
131 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
132 static Mnode
*add_minipool_backward_ref (Mfix
*);
133 static void assign_minipool_offsets (Mfix
*);
134 static void arm_print_value (FILE *, rtx
);
135 static void dump_minipool (rtx_insn
*);
136 static int arm_barrier_cost (rtx_insn
*);
137 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
138 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
139 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
141 static void arm_reorg (void);
142 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
143 static unsigned long arm_compute_save_reg0_reg12_mask (void);
144 static unsigned long arm_compute_save_core_reg_mask (void);
145 static unsigned long arm_isr_value (tree
);
146 static unsigned long arm_compute_func_type (void);
147 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
148 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
149 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
150 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
151 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
153 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
154 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
155 static void arm_output_function_epilogue (FILE *);
156 static void arm_output_function_prologue (FILE *);
157 static int arm_comp_type_attributes (const_tree
, const_tree
);
158 static void arm_set_default_type_attributes (tree
);
159 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
160 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
161 static int optimal_immediate_sequence (enum rtx_code code
,
162 unsigned HOST_WIDE_INT val
,
163 struct four_ints
*return_sequence
);
164 static int optimal_immediate_sequence_1 (enum rtx_code code
,
165 unsigned HOST_WIDE_INT val
,
166 struct four_ints
*return_sequence
,
168 static int arm_get_strip_length (int);
169 static bool arm_function_ok_for_sibcall (tree
, tree
);
170 static machine_mode
arm_promote_function_mode (const_tree
,
173 static bool arm_return_in_memory (const_tree
, const_tree
);
174 static rtx
arm_function_value (const_tree
, const_tree
, bool);
175 static rtx
arm_libcall_value_1 (machine_mode
);
176 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
177 static bool arm_function_value_regno_p (const unsigned int);
178 static void arm_internal_label (FILE *, const char *, unsigned long);
179 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
181 static bool arm_have_conditional_execution (void);
182 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
183 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
184 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
185 static int arm_insn_cost (rtx_insn
*, bool);
186 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
187 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
188 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
189 static void emit_constant_insn (rtx cond
, rtx pattern
);
190 static rtx_insn
*emit_set_insn (rtx
, rtx
);
191 static void arm_add_cfa_adjust_cfa_note (rtx
, int, rtx
, rtx
);
192 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
193 static void arm_emit_multi_reg_pop (unsigned long);
194 static int vfp_emit_fstmd (int, int);
195 static void arm_emit_vfp_multi_reg_pop (int, int, rtx
);
196 static int arm_arg_partial_bytes (cumulative_args_t
,
197 const function_arg_info
&);
198 static rtx
arm_function_arg (cumulative_args_t
, const function_arg_info
&);
199 static void arm_function_arg_advance (cumulative_args_t
,
200 const function_arg_info
&);
201 static pad_direction
arm_function_arg_padding (machine_mode
, const_tree
);
202 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
203 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
205 static rtx
aapcs_libcall_value (machine_mode
);
206 static int aapcs_select_return_coproc (const_tree
, const_tree
);
208 #ifdef OBJECT_FORMAT_ELF
209 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
210 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
213 static void arm_encode_section_info (tree
, rtx
, int);
216 static void arm_file_end (void);
217 static void arm_file_start (void);
218 static void arm_insert_attributes (tree
, tree
*);
220 static void arm_setup_incoming_varargs (cumulative_args_t
,
221 const function_arg_info
&, int *, int);
222 static bool arm_pass_by_reference (cumulative_args_t
,
223 const function_arg_info
&);
224 static bool arm_promote_prototypes (const_tree
);
225 static bool arm_default_short_enums (void);
226 static bool arm_align_anon_bitfield (void);
227 static bool arm_return_in_msb (const_tree
);
228 static bool arm_must_pass_in_stack (const function_arg_info
&);
229 static bool arm_return_in_memory (const_tree
, const_tree
);
231 static void arm_unwind_emit (FILE *, rtx_insn
*);
232 static bool arm_output_ttype (rtx
);
233 static void arm_asm_emit_except_personality (rtx
);
235 static void arm_asm_init_sections (void);
236 static rtx
arm_dwarf_register_span (rtx
);
238 static tree
arm_cxx_guard_type (void);
239 static bool arm_cxx_guard_mask_bit (void);
240 static tree
arm_get_cookie_size (tree
);
241 static bool arm_cookie_has_size (void);
242 static bool arm_cxx_cdtor_returns_this (void);
243 static bool arm_cxx_key_method_may_be_inline (void);
244 static void arm_cxx_determine_class_data_visibility (tree
);
245 static bool arm_cxx_class_data_always_comdat (void);
246 static bool arm_cxx_use_aeabi_atexit (void);
247 static void arm_init_libfuncs (void);
248 static tree
arm_build_builtin_va_list (void);
249 static void arm_expand_builtin_va_start (tree
, rtx
);
250 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
251 static void arm_option_override (void);
252 static void arm_option_restore (struct gcc_options
*, struct gcc_options
*,
253 struct cl_target_option
*);
254 static void arm_override_options_after_change (void);
255 static void arm_option_print (FILE *, int, struct cl_target_option
*);
256 static void arm_set_current_function (tree
);
257 static bool arm_can_inline_p (tree
, tree
);
258 static void arm_relayout_function (tree
);
259 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
260 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
261 static bool arm_sched_can_speculate_insn (rtx_insn
*);
262 static bool arm_macro_fusion_p (void);
263 static bool arm_cannot_copy_insn_p (rtx_insn
*);
264 static int arm_issue_rate (void);
265 static int arm_sched_variable_issue (FILE *, int, rtx_insn
*, int);
266 static int arm_first_cycle_multipass_dfa_lookahead (void);
267 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
268 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
269 static bool arm_output_addr_const_extra (FILE *, rtx
);
270 static bool arm_allocate_stack_slots_for_args (void);
271 static bool arm_warn_func_return (tree
);
272 static tree
arm_promoted_type (const_tree t
);
273 static bool arm_scalar_mode_supported_p (scalar_mode
);
274 static bool arm_frame_pointer_required (void);
275 static bool arm_can_eliminate (const int, const int);
276 static void arm_asm_trampoline_template (FILE *);
277 static void arm_trampoline_init (rtx
, tree
, rtx
);
278 static rtx
arm_trampoline_adjust_address (rtx
);
279 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
280 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
281 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
282 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
283 static bool arm_array_mode_supported_p (machine_mode
,
284 unsigned HOST_WIDE_INT
);
285 static machine_mode
arm_preferred_simd_mode (scalar_mode
);
286 static bool arm_class_likely_spilled_p (reg_class_t
);
287 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
288 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
289 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
293 static void arm_conditional_register_usage (void);
294 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
295 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
296 static unsigned int arm_autovectorize_vector_modes (vector_modes
*, bool);
297 static int arm_default_branch_cost (bool, bool);
298 static int arm_cortex_a5_branch_cost (bool, bool);
299 static int arm_cortex_m_branch_cost (bool, bool);
300 static int arm_cortex_m7_branch_cost (bool, bool);
302 static bool arm_vectorize_vec_perm_const (machine_mode
, machine_mode
, rtx
, rtx
,
303 rtx
, const vec_perm_indices
&);
305 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
307 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
309 int misalign ATTRIBUTE_UNUSED
);
311 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
312 bool op0_preserve_value
);
313 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
315 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
318 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
320 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
322 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
323 static opt_scalar_float_mode
arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode
);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode
);
326 static bool arm_modes_tieable_p (machine_mode
, machine_mode
);
327 static HOST_WIDE_INT
arm_constant_alignment (const_tree
, HOST_WIDE_INT
);
328 static rtx_insn
*thumb1_md_asm_adjust (vec
<rtx
> &, vec
<rtx
> &,
330 vec
<const char *> &, vec
<rtx
> &,
331 vec
<rtx
> &, HARD_REG_SET
&, location_t
);
332 static const char *arm_identify_fpu_from_isa (sbitmap
);
334 /* Table of machine attributes. */
335 static const attribute_spec arm_gnu_attributes
[] =
337 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
338 affects_type_identity, handler, exclude } */
339 /* Function calls made to this symbol must be done indirectly, because
340 it may lie outside of the 26 bit addressing range of a normal function
342 { "long_call", 0, 0, false, true, true, false, NULL
, NULL
},
343 /* Whereas these functions are always known to reside within the 26 bit
345 { "short_call", 0, 0, false, true, true, false, NULL
, NULL
},
346 /* Specify the procedure call conventions for a function. */
347 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute
,
349 /* Interrupt Service Routines have special prologue and epilogue requirements. */
350 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
352 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
354 { "naked", 0, 0, true, false, false, false,
355 arm_handle_fndecl_attribute
, NULL
},
357 /* ARM/PE has three new attributes:
359 dllexport - for exporting a function/variable that will live in a dll
360 dllimport - for importing a function/variable from a dll
362 Microsoft allows multiple declspecs in one __declspec, separating
363 them with spaces. We do NOT support this. Instead, use __declspec
366 { "dllimport", 0, 0, true, false, false, false, NULL
, NULL
},
367 { "dllexport", 0, 0, true, false, false, false, NULL
, NULL
},
368 { "interfacearm", 0, 0, true, false, false, false,
369 arm_handle_fndecl_attribute
, NULL
},
370 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
371 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute
,
373 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute
,
375 { "notshared", 0, 0, false, true, false, false,
376 arm_handle_notshared_attribute
, NULL
},
378 /* ARMv8-M Security Extensions support. */
379 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
380 arm_handle_cmse_nonsecure_entry
, NULL
},
381 { "cmse_nonsecure_call", 0, 0, false, false, false, true,
382 arm_handle_cmse_nonsecure_call
, NULL
},
383 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL
, NULL
}
386 static const scoped_attribute_specs arm_gnu_attribute_table
=
388 "gnu", { arm_gnu_attributes
}
391 static const scoped_attribute_specs
*const arm_attribute_table
[] =
393 &arm_gnu_attribute_table
396 /* Initialize the GCC target structure. */
397 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
398 #undef TARGET_MERGE_DECL_ATTRIBUTES
399 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
402 #undef TARGET_CHECK_BUILTIN_CALL
403 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
405 #undef TARGET_LEGITIMIZE_ADDRESS
406 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
408 #undef TARGET_ATTRIBUTE_TABLE
409 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
411 #undef TARGET_INSERT_ATTRIBUTES
412 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
414 #undef TARGET_ASM_FILE_START
415 #define TARGET_ASM_FILE_START arm_file_start
416 #undef TARGET_ASM_FILE_END
417 #define TARGET_ASM_FILE_END arm_file_end
419 #undef TARGET_ASM_ALIGNED_SI_OP
420 #define TARGET_ASM_ALIGNED_SI_OP NULL
421 #undef TARGET_ASM_INTEGER
422 #define TARGET_ASM_INTEGER arm_assemble_integer
424 #undef TARGET_PRINT_OPERAND
425 #define TARGET_PRINT_OPERAND arm_print_operand
426 #undef TARGET_PRINT_OPERAND_ADDRESS
427 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
428 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
429 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
431 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
432 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
434 #undef TARGET_ASM_FUNCTION_PROLOGUE
435 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
437 #undef TARGET_ASM_FUNCTION_EPILOGUE
438 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
440 #undef TARGET_CAN_INLINE_P
441 #define TARGET_CAN_INLINE_P arm_can_inline_p
443 #undef TARGET_RELAYOUT_FUNCTION
444 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
446 #undef TARGET_OPTION_OVERRIDE
447 #define TARGET_OPTION_OVERRIDE arm_option_override
449 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
450 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
452 #undef TARGET_OPTION_RESTORE
453 #define TARGET_OPTION_RESTORE arm_option_restore
455 #undef TARGET_OPTION_PRINT
456 #define TARGET_OPTION_PRINT arm_option_print
458 #undef TARGET_COMP_TYPE_ATTRIBUTES
459 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
461 #undef TARGET_SCHED_CAN_SPECULATE_INSN
462 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
464 #undef TARGET_SCHED_MACRO_FUSION_P
465 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
467 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
468 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
470 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
471 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
473 #undef TARGET_SCHED_ADJUST_COST
474 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
476 #undef TARGET_SET_CURRENT_FUNCTION
477 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
479 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
480 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
482 #undef TARGET_SCHED_REORDER
483 #define TARGET_SCHED_REORDER arm_sched_reorder
485 #undef TARGET_REGISTER_MOVE_COST
486 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
488 #undef TARGET_MEMORY_MOVE_COST
489 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
491 #undef TARGET_ENCODE_SECTION_INFO
493 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
495 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
498 #undef TARGET_STRIP_NAME_ENCODING
499 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
501 #undef TARGET_ASM_INTERNAL_LABEL
502 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
504 #undef TARGET_FLOATN_MODE
505 #define TARGET_FLOATN_MODE arm_floatn_mode
507 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
508 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
510 #undef TARGET_FUNCTION_VALUE
511 #define TARGET_FUNCTION_VALUE arm_function_value
513 #undef TARGET_LIBCALL_VALUE
514 #define TARGET_LIBCALL_VALUE arm_libcall_value
516 #undef TARGET_FUNCTION_VALUE_REGNO_P
517 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
519 #undef TARGET_GIMPLE_FOLD_BUILTIN
520 #define TARGET_GIMPLE_FOLD_BUILTIN arm_gimple_fold_builtin
522 #undef TARGET_ASM_OUTPUT_MI_THUNK
523 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
524 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
525 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
527 #undef TARGET_RTX_COSTS
528 #define TARGET_RTX_COSTS arm_rtx_costs
529 #undef TARGET_ADDRESS_COST
530 #define TARGET_ADDRESS_COST arm_address_cost
531 #undef TARGET_INSN_COST
532 #define TARGET_INSN_COST arm_insn_cost
534 #undef TARGET_SHIFT_TRUNCATION_MASK
535 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
536 #undef TARGET_VECTOR_MODE_SUPPORTED_P
537 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
538 #undef TARGET_ARRAY_MODE_SUPPORTED_P
539 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
540 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
541 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
542 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
543 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
544 arm_autovectorize_vector_modes
546 #undef TARGET_MACHINE_DEPENDENT_REORG
547 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
549 #undef TARGET_INIT_BUILTINS
550 #define TARGET_INIT_BUILTINS arm_init_builtins
551 #undef TARGET_EXPAND_BUILTIN
552 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
553 #undef TARGET_BUILTIN_DECL
554 #define TARGET_BUILTIN_DECL arm_builtin_decl
556 #undef TARGET_INIT_LIBFUNCS
557 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
559 #undef TARGET_PROMOTE_FUNCTION_MODE
560 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
561 #undef TARGET_PROMOTE_PROTOTYPES
562 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
563 #undef TARGET_PASS_BY_REFERENCE
564 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
565 #undef TARGET_ARG_PARTIAL_BYTES
566 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
567 #undef TARGET_FUNCTION_ARG
568 #define TARGET_FUNCTION_ARG arm_function_arg
569 #undef TARGET_FUNCTION_ARG_ADVANCE
570 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
571 #undef TARGET_FUNCTION_ARG_PADDING
572 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
573 #undef TARGET_FUNCTION_ARG_BOUNDARY
574 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
576 #undef TARGET_SETUP_INCOMING_VARARGS
577 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
579 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
580 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
582 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
583 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
584 #undef TARGET_TRAMPOLINE_INIT
585 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
586 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
587 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
589 #undef TARGET_WARN_FUNC_RETURN
590 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
592 #undef TARGET_DEFAULT_SHORT_ENUMS
593 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
595 #undef TARGET_ALIGN_ANON_BITFIELD
596 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
598 #undef TARGET_NARROW_VOLATILE_BITFIELD
599 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
601 #undef TARGET_CXX_GUARD_TYPE
602 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
604 #undef TARGET_CXX_GUARD_MASK_BIT
605 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
607 #undef TARGET_CXX_GET_COOKIE_SIZE
608 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
610 #undef TARGET_CXX_COOKIE_HAS_SIZE
611 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
613 #undef TARGET_CXX_CDTOR_RETURNS_THIS
614 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
616 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
617 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
619 #undef TARGET_CXX_USE_AEABI_ATEXIT
620 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
622 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
623 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
624 arm_cxx_determine_class_data_visibility
626 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
627 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
629 #undef TARGET_RETURN_IN_MSB
630 #define TARGET_RETURN_IN_MSB arm_return_in_msb
632 #undef TARGET_RETURN_IN_MEMORY
633 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
635 #undef TARGET_MUST_PASS_IN_STACK
636 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
639 #undef TARGET_ASM_UNWIND_EMIT
640 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
642 /* EABI unwinding tables use a different format for the typeinfo tables. */
643 #undef TARGET_ASM_TTYPE
644 #define TARGET_ASM_TTYPE arm_output_ttype
646 #undef TARGET_ARM_EABI_UNWINDER
647 #define TARGET_ARM_EABI_UNWINDER true
649 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
650 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
652 #endif /* ARM_UNWIND_INFO */
654 #undef TARGET_ASM_INIT_SECTIONS
655 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
657 #undef TARGET_DWARF_REGISTER_SPAN
658 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
660 #undef TARGET_CANNOT_COPY_INSN_P
661 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
664 #undef TARGET_HAVE_TLS
665 #define TARGET_HAVE_TLS true
668 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
669 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
671 #undef TARGET_LEGITIMATE_CONSTANT_P
672 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
674 #undef TARGET_CANNOT_FORCE_CONST_MEM
675 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
677 #undef TARGET_MAX_ANCHOR_OFFSET
678 #define TARGET_MAX_ANCHOR_OFFSET 4095
680 /* The minimum is set such that the total size of the block
681 for a particular anchor is -4088 + 1 + 4095 bytes, which is
682 divisible by eight, ensuring natural spacing of anchors. */
683 #undef TARGET_MIN_ANCHOR_OFFSET
684 #define TARGET_MIN_ANCHOR_OFFSET -4088
686 #undef TARGET_SCHED_ISSUE_RATE
687 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
689 #undef TARGET_SCHED_VARIABLE_ISSUE
690 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
692 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
693 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
694 arm_first_cycle_multipass_dfa_lookahead
696 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
697 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
698 arm_first_cycle_multipass_dfa_lookahead_guard
700 #undef TARGET_MANGLE_TYPE
701 #define TARGET_MANGLE_TYPE arm_mangle_type
703 #undef TARGET_INVALID_CONVERSION
704 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
706 #undef TARGET_INVALID_UNARY_OP
707 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
709 #undef TARGET_INVALID_BINARY_OP
710 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
712 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
713 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
715 #undef TARGET_BUILD_BUILTIN_VA_LIST
716 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
717 #undef TARGET_EXPAND_BUILTIN_VA_START
718 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
719 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
720 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
723 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
724 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
727 #undef TARGET_LEGITIMATE_ADDRESS_P
728 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
730 #undef TARGET_PREFERRED_RELOAD_CLASS
731 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
733 #undef TARGET_PROMOTED_TYPE
734 #define TARGET_PROMOTED_TYPE arm_promoted_type
736 #undef TARGET_SCALAR_MODE_SUPPORTED_P
737 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
739 #undef TARGET_COMPUTE_FRAME_LAYOUT
740 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
742 #undef TARGET_FRAME_POINTER_REQUIRED
743 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
745 #undef TARGET_CAN_ELIMINATE
746 #define TARGET_CAN_ELIMINATE arm_can_eliminate
748 #undef TARGET_CONDITIONAL_REGISTER_USAGE
749 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
751 #undef TARGET_CLASS_LIKELY_SPILLED_P
752 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
754 #undef TARGET_VECTORIZE_BUILTINS
755 #define TARGET_VECTORIZE_BUILTINS
757 #undef TARGET_VECTOR_ALIGNMENT
758 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
760 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
761 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
762 arm_vector_alignment_reachable
764 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
765 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
766 arm_builtin_support_vector_misalignment
768 #undef TARGET_PREFERRED_RENAME_CLASS
769 #define TARGET_PREFERRED_RENAME_CLASS \
770 arm_preferred_rename_class
772 #undef TARGET_VECTORIZE_VEC_PERM_CONST
773 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
775 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
776 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
777 arm_builtin_vectorization_cost
779 #undef TARGET_CANONICALIZE_COMPARISON
780 #define TARGET_CANONICALIZE_COMPARISON \
781 arm_canonicalize_comparison
783 #undef TARGET_ASAN_SHADOW_OFFSET
784 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
786 #undef MAX_INSN_PER_IT_BLOCK
787 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
789 #undef TARGET_CAN_USE_DOLOOP_P
790 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
792 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
793 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
795 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
796 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
798 #undef TARGET_SCHED_FUSION_PRIORITY
799 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
801 #undef TARGET_ASM_FUNCTION_SECTION
802 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
804 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
805 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
807 #undef TARGET_SECTION_TYPE_FLAGS
808 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
810 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
811 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
813 #undef TARGET_C_EXCESS_PRECISION
814 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
816 /* Although the architecture reserves bits 0 and 1, only the former is
817 used for ARM/Thumb ISA selection in v7 and earlier versions. */
818 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
819 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
821 #undef TARGET_FIXED_CONDITION_CODE_REGS
822 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
824 #undef TARGET_HARD_REGNO_NREGS
825 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
826 #undef TARGET_HARD_REGNO_MODE_OK
827 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
829 #undef TARGET_MODES_TIEABLE_P
830 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
832 #undef TARGET_CAN_CHANGE_MODE_CLASS
833 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
835 #undef TARGET_CONSTANT_ALIGNMENT
836 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
838 #undef TARGET_INVALID_WITHIN_DOLOOP
839 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
841 #undef TARGET_MD_ASM_ADJUST
842 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
844 #undef TARGET_STACK_PROTECT_GUARD
845 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
847 #undef TARGET_VECTORIZE_GET_MASK_MODE
848 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
850 /* Obstack for minipool constant handling. */
851 static struct obstack minipool_obstack
;
852 static char * minipool_startobj
;
854 /* The maximum number of insns skipped which
855 will be conditionalised if possible. */
856 static int max_insns_skipped
= 5;
858 /* True if we are currently building a constant table. */
859 int making_const_table
;
861 /* The processor for which instructions should be scheduled. */
862 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
864 /* The current tuning set. */
865 const struct tune_params
*current_tune
;
867 /* Which floating point hardware to schedule for. */
870 /* Used for Thumb call_via trampolines. */
871 rtx thumb_call_via_label
[14];
872 static int thumb_call_reg_needed
;
874 /* The bits in this mask specify which instruction scheduling options should
876 unsigned int tune_flags
= 0;
878 /* The highest ARM architecture version supported by the
880 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
882 /* Active target architecture and tuning. */
884 struct arm_build_target arm_active_target
;
886 /* The following are used in the arm.md file as equivalents to bits
887 in the above two flag variables. */
889 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
892 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
895 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
898 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
901 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
904 /* Nonzero if this chip supports the ARM 6K extensions. */
907 /* Nonzero if this chip supports the ARM 6KZ extensions. */
910 /* Nonzero if instructions present in ARMv6-M can be used. */
913 /* Nonzero if this chip supports the ARM 7 extensions. */
916 /* Nonzero if this chip supports the Large Physical Address Extension. */
917 int arm_arch_lpae
= 0;
919 /* Nonzero if instructions not present in the 'M' profile can be used. */
920 int arm_arch_notm
= 0;
922 /* Nonzero if instructions present in ARMv7E-M can be used. */
925 /* Nonzero if instructions present in ARMv8 can be used. */
928 /* Nonzero if this chip supports the ARMv8.1 extensions. */
931 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
934 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
937 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
940 /* Nonzero if this chip supports the ARM Architecture 8-M Mainline
942 int arm_arch8m_main
= 0;
944 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
946 int arm_arch8_1m_main
= 0;
948 /* Nonzero if this chip supports the FP16 instructions extension of ARM
950 int arm_fp16_inst
= 0;
952 /* Nonzero if this chip can benefit from load scheduling. */
953 int arm_ld_sched
= 0;
955 /* Nonzero if this chip is a StrongARM. */
956 int arm_tune_strongarm
= 0;
958 /* Nonzero if this chip supports Intel Wireless MMX technology. */
959 int arm_arch_iwmmxt
= 0;
961 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
962 int arm_arch_iwmmxt2
= 0;
964 /* Nonzero if this chip is an XScale. */
965 int arm_arch_xscale
= 0;
967 /* Nonzero if tuning for XScale */
968 int arm_tune_xscale
= 0;
970 /* Nonzero if we want to tune for stores that access the write-buffer.
971 This typically means an ARM6 or ARM7 with MMU or MPU. */
972 int arm_tune_wbuf
= 0;
974 /* Nonzero if tuning for Cortex-A9. */
975 int arm_tune_cortex_a9
= 0;
977 /* Nonzero if we should define __THUMB_INTERWORK__ in the
979 XXX This is a bit of a hack, it's intended to help work around
980 problems in GLD which doesn't understand that armv5t code is
981 interworking clean. */
982 int arm_cpp_interwork
= 0;
984 /* Nonzero if chip supports Thumb 1. */
987 /* Nonzero if chip supports Thumb 2. */
990 /* Nonzero if chip supports integer division instruction. */
991 int arm_arch_arm_hwdiv
;
992 int arm_arch_thumb_hwdiv
;
994 /* Nonzero if chip disallows volatile memory access in IT block. */
995 int arm_arch_no_volatile_ce
;
997 /* Nonzero if we shouldn't use literal pools. */
998 bool arm_disable_literal_pool
= false;
1000 /* The register number to be used for the PIC offset register. */
1001 unsigned arm_pic_register
= INVALID_REGNUM
;
1003 enum arm_pcs arm_pcs_default
;
1005 /* For an explanation of these variables, see final_prescan_insn below. */
1006 int arm_ccfsm_state
;
1007 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
1008 enum arm_cond_code arm_current_cc
;
1010 rtx arm_target_insn
;
1011 int arm_target_label
;
1012 /* The number of conditionally executed insns, including the current insn. */
1013 int arm_condexec_count
= 0;
1014 /* A bitmask specifying the patterns for the IT block.
1015 Zero means do not output an IT block before this insn. */
1016 int arm_condexec_mask
= 0;
1017 /* The number of bits used in arm_condexec_mask. */
1018 int arm_condexec_masklen
= 0;
1020 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1021 int arm_arch_crc
= 0;
1023 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1024 int arm_arch_dotprod
= 0;
1026 /* Nonzero if chip supports the ARMv8-M security extensions. */
1027 int arm_arch_cmse
= 0;
1029 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1030 int arm_m_profile_small_mul
= 0;
1032 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1033 int arm_arch_i8mm
= 0;
1035 /* Nonzero if chip supports the BFloat16 instructions. */
1036 int arm_arch_bf16
= 0;
1038 /* Nonzero if chip supports the Custom Datapath Extension. */
1039 int arm_arch_cde
= 0;
1040 int arm_arch_cde_coproc
= 0;
1041 const int arm_arch_cde_coproc_bits
[] = {
1042 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1045 /* The condition codes of the ARM, and the inverse function. */
1046 static const char * const arm_condition_codes
[] =
1048 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1049 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1052 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1053 int arm_regs_in_sequence
[] =
1055 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1058 #define DEF_FP_SYSREG(reg) #reg,
1059 const char *fp_sysreg_names
[NB_FP_SYSREGS
] = {
1062 #undef DEF_FP_SYSREG
1064 #define ARM_LSL_NAME "lsl"
1065 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1067 #define THUMB2_WORK_REGS \
1068 (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1069 | (1 << SP_REGNUM) \
1070 | (1 << PC_REGNUM) \
1071 | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM \
1072 ? (1 << PIC_OFFSET_TABLE_REGNUM) \
1075 /* Initialization code. */
1079 enum processor_type scheduler
;
1080 unsigned int tune_flags
;
1081 const struct tune_params
*tune
;
1084 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1085 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1092 /* arm generic vectorizer costs. */
1094 struct cpu_vec_costs arm_default_vec_cost
= {
1095 1, /* scalar_stmt_cost. */
1096 1, /* scalar load_cost. */
1097 1, /* scalar_store_cost. */
1098 1, /* vec_stmt_cost. */
1099 1, /* vec_to_scalar_cost. */
1100 1, /* scalar_to_vec_cost. */
1101 1, /* vec_align_load_cost. */
1102 1, /* vec_unalign_load_cost. */
1103 1, /* vec_unalign_store_cost. */
1104 1, /* vec_store_cost. */
1105 3, /* cond_taken_branch_cost. */
1106 1, /* cond_not_taken_branch_cost. */
1109 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1110 #include "aarch-cost-tables.h"
1114 const struct cpu_cost_table cortexa9_extra_costs
=
1121 COSTS_N_INSNS (1), /* shift_reg. */
1122 COSTS_N_INSNS (1), /* arith_shift. */
1123 COSTS_N_INSNS (2), /* arith_shift_reg. */
1125 COSTS_N_INSNS (1), /* log_shift_reg. */
1126 COSTS_N_INSNS (1), /* extend. */
1127 COSTS_N_INSNS (2), /* extend_arith. */
1128 COSTS_N_INSNS (1), /* bfi. */
1129 COSTS_N_INSNS (1), /* bfx. */
1133 true /* non_exec_costs_exec. */
1138 COSTS_N_INSNS (3), /* simple. */
1139 COSTS_N_INSNS (3), /* flag_setting. */
1140 COSTS_N_INSNS (2), /* extend. */
1141 COSTS_N_INSNS (3), /* add. */
1142 COSTS_N_INSNS (2), /* extend_add. */
1143 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1147 0, /* simple (N/A). */
1148 0, /* flag_setting (N/A). */
1149 COSTS_N_INSNS (4), /* extend. */
1151 COSTS_N_INSNS (4), /* extend_add. */
1157 COSTS_N_INSNS (2), /* load. */
1158 COSTS_N_INSNS (2), /* load_sign_extend. */
1159 COSTS_N_INSNS (2), /* ldrd. */
1160 COSTS_N_INSNS (2), /* ldm_1st. */
1161 1, /* ldm_regs_per_insn_1st. */
1162 2, /* ldm_regs_per_insn_subsequent. */
1163 COSTS_N_INSNS (5), /* loadf. */
1164 COSTS_N_INSNS (5), /* loadd. */
1165 COSTS_N_INSNS (1), /* load_unaligned. */
1166 COSTS_N_INSNS (2), /* store. */
1167 COSTS_N_INSNS (2), /* strd. */
1168 COSTS_N_INSNS (2), /* stm_1st. */
1169 1, /* stm_regs_per_insn_1st. */
1170 2, /* stm_regs_per_insn_subsequent. */
1171 COSTS_N_INSNS (1), /* storef. */
1172 COSTS_N_INSNS (1), /* stored. */
1173 COSTS_N_INSNS (1), /* store_unaligned. */
1174 COSTS_N_INSNS (1), /* loadv. */
1175 COSTS_N_INSNS (1) /* storev. */
1180 COSTS_N_INSNS (14), /* div. */
1181 COSTS_N_INSNS (4), /* mult. */
1182 COSTS_N_INSNS (7), /* mult_addsub. */
1183 COSTS_N_INSNS (30), /* fma. */
1184 COSTS_N_INSNS (3), /* addsub. */
1185 COSTS_N_INSNS (1), /* fpconst. */
1186 COSTS_N_INSNS (1), /* neg. */
1187 COSTS_N_INSNS (3), /* compare. */
1188 COSTS_N_INSNS (3), /* widen. */
1189 COSTS_N_INSNS (3), /* narrow. */
1190 COSTS_N_INSNS (3), /* toint. */
1191 COSTS_N_INSNS (3), /* fromint. */
1192 COSTS_N_INSNS (3) /* roundint. */
1196 COSTS_N_INSNS (24), /* div. */
1197 COSTS_N_INSNS (5), /* mult. */
1198 COSTS_N_INSNS (8), /* mult_addsub. */
1199 COSTS_N_INSNS (30), /* fma. */
1200 COSTS_N_INSNS (3), /* addsub. */
1201 COSTS_N_INSNS (1), /* fpconst. */
1202 COSTS_N_INSNS (1), /* neg. */
1203 COSTS_N_INSNS (3), /* compare. */
1204 COSTS_N_INSNS (3), /* widen. */
1205 COSTS_N_INSNS (3), /* narrow. */
1206 COSTS_N_INSNS (3), /* toint. */
1207 COSTS_N_INSNS (3), /* fromint. */
1208 COSTS_N_INSNS (3) /* roundint. */
1213 COSTS_N_INSNS (1), /* alu. */
1214 COSTS_N_INSNS (4), /* mult. */
1215 COSTS_N_INSNS (1), /* movi. */
1216 COSTS_N_INSNS (2), /* dup. */
1217 COSTS_N_INSNS (2) /* extract. */
1221 const struct cpu_cost_table cortexa8_extra_costs
=
1227 COSTS_N_INSNS (1), /* shift. */
1229 COSTS_N_INSNS (1), /* arith_shift. */
1230 0, /* arith_shift_reg. */
1231 COSTS_N_INSNS (1), /* log_shift. */
1232 0, /* log_shift_reg. */
1234 0, /* extend_arith. */
1240 true /* non_exec_costs_exec. */
1245 COSTS_N_INSNS (1), /* simple. */
1246 COSTS_N_INSNS (1), /* flag_setting. */
1247 COSTS_N_INSNS (1), /* extend. */
1248 COSTS_N_INSNS (1), /* add. */
1249 COSTS_N_INSNS (1), /* extend_add. */
1250 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1254 0, /* simple (N/A). */
1255 0, /* flag_setting (N/A). */
1256 COSTS_N_INSNS (2), /* extend. */
1258 COSTS_N_INSNS (2), /* extend_add. */
1264 COSTS_N_INSNS (1), /* load. */
1265 COSTS_N_INSNS (1), /* load_sign_extend. */
1266 COSTS_N_INSNS (1), /* ldrd. */
1267 COSTS_N_INSNS (1), /* ldm_1st. */
1268 1, /* ldm_regs_per_insn_1st. */
1269 2, /* ldm_regs_per_insn_subsequent. */
1270 COSTS_N_INSNS (1), /* loadf. */
1271 COSTS_N_INSNS (1), /* loadd. */
1272 COSTS_N_INSNS (1), /* load_unaligned. */
1273 COSTS_N_INSNS (1), /* store. */
1274 COSTS_N_INSNS (1), /* strd. */
1275 COSTS_N_INSNS (1), /* stm_1st. */
1276 1, /* stm_regs_per_insn_1st. */
1277 2, /* stm_regs_per_insn_subsequent. */
1278 COSTS_N_INSNS (1), /* storef. */
1279 COSTS_N_INSNS (1), /* stored. */
1280 COSTS_N_INSNS (1), /* store_unaligned. */
1281 COSTS_N_INSNS (1), /* loadv. */
1282 COSTS_N_INSNS (1) /* storev. */
1287 COSTS_N_INSNS (36), /* div. */
1288 COSTS_N_INSNS (11), /* mult. */
1289 COSTS_N_INSNS (20), /* mult_addsub. */
1290 COSTS_N_INSNS (30), /* fma. */
1291 COSTS_N_INSNS (9), /* addsub. */
1292 COSTS_N_INSNS (3), /* fpconst. */
1293 COSTS_N_INSNS (3), /* neg. */
1294 COSTS_N_INSNS (6), /* compare. */
1295 COSTS_N_INSNS (4), /* widen. */
1296 COSTS_N_INSNS (4), /* narrow. */
1297 COSTS_N_INSNS (8), /* toint. */
1298 COSTS_N_INSNS (8), /* fromint. */
1299 COSTS_N_INSNS (8) /* roundint. */
1303 COSTS_N_INSNS (64), /* div. */
1304 COSTS_N_INSNS (16), /* mult. */
1305 COSTS_N_INSNS (25), /* mult_addsub. */
1306 COSTS_N_INSNS (30), /* fma. */
1307 COSTS_N_INSNS (9), /* addsub. */
1308 COSTS_N_INSNS (3), /* fpconst. */
1309 COSTS_N_INSNS (3), /* neg. */
1310 COSTS_N_INSNS (6), /* compare. */
1311 COSTS_N_INSNS (6), /* widen. */
1312 COSTS_N_INSNS (6), /* narrow. */
1313 COSTS_N_INSNS (8), /* toint. */
1314 COSTS_N_INSNS (8), /* fromint. */
1315 COSTS_N_INSNS (8) /* roundint. */
1320 COSTS_N_INSNS (1), /* alu. */
1321 COSTS_N_INSNS (4), /* mult. */
1322 COSTS_N_INSNS (1), /* movi. */
1323 COSTS_N_INSNS (2), /* dup. */
1324 COSTS_N_INSNS (2) /* extract. */
1328 const struct cpu_cost_table cortexa5_extra_costs
=
1334 COSTS_N_INSNS (1), /* shift. */
1335 COSTS_N_INSNS (1), /* shift_reg. */
1336 COSTS_N_INSNS (1), /* arith_shift. */
1337 COSTS_N_INSNS (1), /* arith_shift_reg. */
1338 COSTS_N_INSNS (1), /* log_shift. */
1339 COSTS_N_INSNS (1), /* log_shift_reg. */
1340 COSTS_N_INSNS (1), /* extend. */
1341 COSTS_N_INSNS (1), /* extend_arith. */
1342 COSTS_N_INSNS (1), /* bfi. */
1343 COSTS_N_INSNS (1), /* bfx. */
1344 COSTS_N_INSNS (1), /* clz. */
1345 COSTS_N_INSNS (1), /* rev. */
1347 true /* non_exec_costs_exec. */
1354 COSTS_N_INSNS (1), /* flag_setting. */
1355 COSTS_N_INSNS (1), /* extend. */
1356 COSTS_N_INSNS (1), /* add. */
1357 COSTS_N_INSNS (1), /* extend_add. */
1358 COSTS_N_INSNS (7) /* idiv. */
1362 0, /* simple (N/A). */
1363 0, /* flag_setting (N/A). */
1364 COSTS_N_INSNS (1), /* extend. */
1366 COSTS_N_INSNS (2), /* extend_add. */
1372 COSTS_N_INSNS (1), /* load. */
1373 COSTS_N_INSNS (1), /* load_sign_extend. */
1374 COSTS_N_INSNS (6), /* ldrd. */
1375 COSTS_N_INSNS (1), /* ldm_1st. */
1376 1, /* ldm_regs_per_insn_1st. */
1377 2, /* ldm_regs_per_insn_subsequent. */
1378 COSTS_N_INSNS (2), /* loadf. */
1379 COSTS_N_INSNS (4), /* loadd. */
1380 COSTS_N_INSNS (1), /* load_unaligned. */
1381 COSTS_N_INSNS (1), /* store. */
1382 COSTS_N_INSNS (3), /* strd. */
1383 COSTS_N_INSNS (1), /* stm_1st. */
1384 1, /* stm_regs_per_insn_1st. */
1385 2, /* stm_regs_per_insn_subsequent. */
1386 COSTS_N_INSNS (2), /* storef. */
1387 COSTS_N_INSNS (2), /* stored. */
1388 COSTS_N_INSNS (1), /* store_unaligned. */
1389 COSTS_N_INSNS (1), /* loadv. */
1390 COSTS_N_INSNS (1) /* storev. */
1395 COSTS_N_INSNS (15), /* div. */
1396 COSTS_N_INSNS (3), /* mult. */
1397 COSTS_N_INSNS (7), /* mult_addsub. */
1398 COSTS_N_INSNS (7), /* fma. */
1399 COSTS_N_INSNS (3), /* addsub. */
1400 COSTS_N_INSNS (3), /* fpconst. */
1401 COSTS_N_INSNS (3), /* neg. */
1402 COSTS_N_INSNS (3), /* compare. */
1403 COSTS_N_INSNS (3), /* widen. */
1404 COSTS_N_INSNS (3), /* narrow. */
1405 COSTS_N_INSNS (3), /* toint. */
1406 COSTS_N_INSNS (3), /* fromint. */
1407 COSTS_N_INSNS (3) /* roundint. */
1411 COSTS_N_INSNS (30), /* div. */
1412 COSTS_N_INSNS (6), /* mult. */
1413 COSTS_N_INSNS (10), /* mult_addsub. */
1414 COSTS_N_INSNS (7), /* fma. */
1415 COSTS_N_INSNS (3), /* addsub. */
1416 COSTS_N_INSNS (3), /* fpconst. */
1417 COSTS_N_INSNS (3), /* neg. */
1418 COSTS_N_INSNS (3), /* compare. */
1419 COSTS_N_INSNS (3), /* widen. */
1420 COSTS_N_INSNS (3), /* narrow. */
1421 COSTS_N_INSNS (3), /* toint. */
1422 COSTS_N_INSNS (3), /* fromint. */
1423 COSTS_N_INSNS (3) /* roundint. */
1428 COSTS_N_INSNS (1), /* alu. */
1429 COSTS_N_INSNS (4), /* mult. */
1430 COSTS_N_INSNS (1), /* movi. */
1431 COSTS_N_INSNS (2), /* dup. */
1432 COSTS_N_INSNS (2) /* extract. */
1437 const struct cpu_cost_table cortexa7_extra_costs
=
1443 COSTS_N_INSNS (1), /* shift. */
1444 COSTS_N_INSNS (1), /* shift_reg. */
1445 COSTS_N_INSNS (1), /* arith_shift. */
1446 COSTS_N_INSNS (1), /* arith_shift_reg. */
1447 COSTS_N_INSNS (1), /* log_shift. */
1448 COSTS_N_INSNS (1), /* log_shift_reg. */
1449 COSTS_N_INSNS (1), /* extend. */
1450 COSTS_N_INSNS (1), /* extend_arith. */
1451 COSTS_N_INSNS (1), /* bfi. */
1452 COSTS_N_INSNS (1), /* bfx. */
1453 COSTS_N_INSNS (1), /* clz. */
1454 COSTS_N_INSNS (1), /* rev. */
1456 true /* non_exec_costs_exec. */
1463 COSTS_N_INSNS (1), /* flag_setting. */
1464 COSTS_N_INSNS (1), /* extend. */
1465 COSTS_N_INSNS (1), /* add. */
1466 COSTS_N_INSNS (1), /* extend_add. */
1467 COSTS_N_INSNS (7) /* idiv. */
1471 0, /* simple (N/A). */
1472 0, /* flag_setting (N/A). */
1473 COSTS_N_INSNS (1), /* extend. */
1475 COSTS_N_INSNS (2), /* extend_add. */
1481 COSTS_N_INSNS (1), /* load. */
1482 COSTS_N_INSNS (1), /* load_sign_extend. */
1483 COSTS_N_INSNS (3), /* ldrd. */
1484 COSTS_N_INSNS (1), /* ldm_1st. */
1485 1, /* ldm_regs_per_insn_1st. */
1486 2, /* ldm_regs_per_insn_subsequent. */
1487 COSTS_N_INSNS (2), /* loadf. */
1488 COSTS_N_INSNS (2), /* loadd. */
1489 COSTS_N_INSNS (1), /* load_unaligned. */
1490 COSTS_N_INSNS (1), /* store. */
1491 COSTS_N_INSNS (3), /* strd. */
1492 COSTS_N_INSNS (1), /* stm_1st. */
1493 1, /* stm_regs_per_insn_1st. */
1494 2, /* stm_regs_per_insn_subsequent. */
1495 COSTS_N_INSNS (2), /* storef. */
1496 COSTS_N_INSNS (2), /* stored. */
1497 COSTS_N_INSNS (1), /* store_unaligned. */
1498 COSTS_N_INSNS (1), /* loadv. */
1499 COSTS_N_INSNS (1) /* storev. */
1504 COSTS_N_INSNS (15), /* div. */
1505 COSTS_N_INSNS (3), /* mult. */
1506 COSTS_N_INSNS (7), /* mult_addsub. */
1507 COSTS_N_INSNS (7), /* fma. */
1508 COSTS_N_INSNS (3), /* addsub. */
1509 COSTS_N_INSNS (3), /* fpconst. */
1510 COSTS_N_INSNS (3), /* neg. */
1511 COSTS_N_INSNS (3), /* compare. */
1512 COSTS_N_INSNS (3), /* widen. */
1513 COSTS_N_INSNS (3), /* narrow. */
1514 COSTS_N_INSNS (3), /* toint. */
1515 COSTS_N_INSNS (3), /* fromint. */
1516 COSTS_N_INSNS (3) /* roundint. */
1520 COSTS_N_INSNS (30), /* div. */
1521 COSTS_N_INSNS (6), /* mult. */
1522 COSTS_N_INSNS (10), /* mult_addsub. */
1523 COSTS_N_INSNS (7), /* fma. */
1524 COSTS_N_INSNS (3), /* addsub. */
1525 COSTS_N_INSNS (3), /* fpconst. */
1526 COSTS_N_INSNS (3), /* neg. */
1527 COSTS_N_INSNS (3), /* compare. */
1528 COSTS_N_INSNS (3), /* widen. */
1529 COSTS_N_INSNS (3), /* narrow. */
1530 COSTS_N_INSNS (3), /* toint. */
1531 COSTS_N_INSNS (3), /* fromint. */
1532 COSTS_N_INSNS (3) /* roundint. */
1537 COSTS_N_INSNS (1), /* alu. */
1538 COSTS_N_INSNS (4), /* mult. */
1539 COSTS_N_INSNS (1), /* movi. */
1540 COSTS_N_INSNS (2), /* dup. */
1541 COSTS_N_INSNS (2) /* extract. */
1545 const struct cpu_cost_table cortexa12_extra_costs
=
1552 COSTS_N_INSNS (1), /* shift_reg. */
1553 COSTS_N_INSNS (1), /* arith_shift. */
1554 COSTS_N_INSNS (1), /* arith_shift_reg. */
1555 COSTS_N_INSNS (1), /* log_shift. */
1556 COSTS_N_INSNS (1), /* log_shift_reg. */
1558 COSTS_N_INSNS (1), /* extend_arith. */
1560 COSTS_N_INSNS (1), /* bfx. */
1561 COSTS_N_INSNS (1), /* clz. */
1562 COSTS_N_INSNS (1), /* rev. */
1564 true /* non_exec_costs_exec. */
1569 COSTS_N_INSNS (2), /* simple. */
1570 COSTS_N_INSNS (3), /* flag_setting. */
1571 COSTS_N_INSNS (2), /* extend. */
1572 COSTS_N_INSNS (3), /* add. */
1573 COSTS_N_INSNS (2), /* extend_add. */
1574 COSTS_N_INSNS (18) /* idiv. */
1578 0, /* simple (N/A). */
1579 0, /* flag_setting (N/A). */
1580 COSTS_N_INSNS (3), /* extend. */
1582 COSTS_N_INSNS (3), /* extend_add. */
1588 COSTS_N_INSNS (3), /* load. */
1589 COSTS_N_INSNS (3), /* load_sign_extend. */
1590 COSTS_N_INSNS (3), /* ldrd. */
1591 COSTS_N_INSNS (3), /* ldm_1st. */
1592 1, /* ldm_regs_per_insn_1st. */
1593 2, /* ldm_regs_per_insn_subsequent. */
1594 COSTS_N_INSNS (3), /* loadf. */
1595 COSTS_N_INSNS (3), /* loadd. */
1596 0, /* load_unaligned. */
1600 1, /* stm_regs_per_insn_1st. */
1601 2, /* stm_regs_per_insn_subsequent. */
1602 COSTS_N_INSNS (2), /* storef. */
1603 COSTS_N_INSNS (2), /* stored. */
1604 0, /* store_unaligned. */
1605 COSTS_N_INSNS (1), /* loadv. */
1606 COSTS_N_INSNS (1) /* storev. */
1611 COSTS_N_INSNS (17), /* div. */
1612 COSTS_N_INSNS (4), /* mult. */
1613 COSTS_N_INSNS (8), /* mult_addsub. */
1614 COSTS_N_INSNS (8), /* fma. */
1615 COSTS_N_INSNS (4), /* addsub. */
1616 COSTS_N_INSNS (2), /* fpconst. */
1617 COSTS_N_INSNS (2), /* neg. */
1618 COSTS_N_INSNS (2), /* compare. */
1619 COSTS_N_INSNS (4), /* widen. */
1620 COSTS_N_INSNS (4), /* narrow. */
1621 COSTS_N_INSNS (4), /* toint. */
1622 COSTS_N_INSNS (4), /* fromint. */
1623 COSTS_N_INSNS (4) /* roundint. */
1627 COSTS_N_INSNS (31), /* div. */
1628 COSTS_N_INSNS (4), /* mult. */
1629 COSTS_N_INSNS (8), /* mult_addsub. */
1630 COSTS_N_INSNS (8), /* fma. */
1631 COSTS_N_INSNS (4), /* addsub. */
1632 COSTS_N_INSNS (2), /* fpconst. */
1633 COSTS_N_INSNS (2), /* neg. */
1634 COSTS_N_INSNS (2), /* compare. */
1635 COSTS_N_INSNS (4), /* widen. */
1636 COSTS_N_INSNS (4), /* narrow. */
1637 COSTS_N_INSNS (4), /* toint. */
1638 COSTS_N_INSNS (4), /* fromint. */
1639 COSTS_N_INSNS (4) /* roundint. */
1644 COSTS_N_INSNS (1), /* alu. */
1645 COSTS_N_INSNS (4), /* mult. */
1646 COSTS_N_INSNS (1), /* movi. */
1647 COSTS_N_INSNS (2), /* dup. */
1648 COSTS_N_INSNS (2) /* extract. */
1652 const struct cpu_cost_table cortexa15_extra_costs
=
1660 COSTS_N_INSNS (1), /* arith_shift. */
1661 COSTS_N_INSNS (1), /* arith_shift_reg. */
1662 COSTS_N_INSNS (1), /* log_shift. */
1663 COSTS_N_INSNS (1), /* log_shift_reg. */
1665 COSTS_N_INSNS (1), /* extend_arith. */
1666 COSTS_N_INSNS (1), /* bfi. */
1671 true /* non_exec_costs_exec. */
1676 COSTS_N_INSNS (2), /* simple. */
1677 COSTS_N_INSNS (3), /* flag_setting. */
1678 COSTS_N_INSNS (2), /* extend. */
1679 COSTS_N_INSNS (2), /* add. */
1680 COSTS_N_INSNS (2), /* extend_add. */
1681 COSTS_N_INSNS (18) /* idiv. */
1685 0, /* simple (N/A). */
1686 0, /* flag_setting (N/A). */
1687 COSTS_N_INSNS (3), /* extend. */
1689 COSTS_N_INSNS (3), /* extend_add. */
1695 COSTS_N_INSNS (3), /* load. */
1696 COSTS_N_INSNS (3), /* load_sign_extend. */
1697 COSTS_N_INSNS (3), /* ldrd. */
1698 COSTS_N_INSNS (4), /* ldm_1st. */
1699 1, /* ldm_regs_per_insn_1st. */
1700 2, /* ldm_regs_per_insn_subsequent. */
1701 COSTS_N_INSNS (4), /* loadf. */
1702 COSTS_N_INSNS (4), /* loadd. */
1703 0, /* load_unaligned. */
1706 COSTS_N_INSNS (1), /* stm_1st. */
1707 1, /* stm_regs_per_insn_1st. */
1708 2, /* stm_regs_per_insn_subsequent. */
1711 0, /* store_unaligned. */
1712 COSTS_N_INSNS (1), /* loadv. */
1713 COSTS_N_INSNS (1) /* storev. */
1718 COSTS_N_INSNS (17), /* div. */
1719 COSTS_N_INSNS (4), /* mult. */
1720 COSTS_N_INSNS (8), /* mult_addsub. */
1721 COSTS_N_INSNS (8), /* fma. */
1722 COSTS_N_INSNS (4), /* addsub. */
1723 COSTS_N_INSNS (2), /* fpconst. */
1724 COSTS_N_INSNS (2), /* neg. */
1725 COSTS_N_INSNS (5), /* compare. */
1726 COSTS_N_INSNS (4), /* widen. */
1727 COSTS_N_INSNS (4), /* narrow. */
1728 COSTS_N_INSNS (4), /* toint. */
1729 COSTS_N_INSNS (4), /* fromint. */
1730 COSTS_N_INSNS (4) /* roundint. */
1734 COSTS_N_INSNS (31), /* div. */
1735 COSTS_N_INSNS (4), /* mult. */
1736 COSTS_N_INSNS (8), /* mult_addsub. */
1737 COSTS_N_INSNS (8), /* fma. */
1738 COSTS_N_INSNS (4), /* addsub. */
1739 COSTS_N_INSNS (2), /* fpconst. */
1740 COSTS_N_INSNS (2), /* neg. */
1741 COSTS_N_INSNS (2), /* compare. */
1742 COSTS_N_INSNS (4), /* widen. */
1743 COSTS_N_INSNS (4), /* narrow. */
1744 COSTS_N_INSNS (4), /* toint. */
1745 COSTS_N_INSNS (4), /* fromint. */
1746 COSTS_N_INSNS (4) /* roundint. */
1751 COSTS_N_INSNS (1), /* alu. */
1752 COSTS_N_INSNS (4), /* mult. */
1753 COSTS_N_INSNS (1), /* movi. */
1754 COSTS_N_INSNS (2), /* dup. */
1755 COSTS_N_INSNS (2) /* extract. */
1759 const struct cpu_cost_table v7m_extra_costs
=
1767 0, /* arith_shift. */
1768 COSTS_N_INSNS (1), /* arith_shift_reg. */
1770 COSTS_N_INSNS (1), /* log_shift_reg. */
1772 COSTS_N_INSNS (1), /* extend_arith. */
1777 COSTS_N_INSNS (1), /* non_exec. */
1778 false /* non_exec_costs_exec. */
1783 COSTS_N_INSNS (1), /* simple. */
1784 COSTS_N_INSNS (1), /* flag_setting. */
1785 COSTS_N_INSNS (2), /* extend. */
1786 COSTS_N_INSNS (1), /* add. */
1787 COSTS_N_INSNS (3), /* extend_add. */
1788 COSTS_N_INSNS (8) /* idiv. */
1792 0, /* simple (N/A). */
1793 0, /* flag_setting (N/A). */
1794 COSTS_N_INSNS (2), /* extend. */
1796 COSTS_N_INSNS (3), /* extend_add. */
1802 COSTS_N_INSNS (2), /* load. */
1803 0, /* load_sign_extend. */
1804 COSTS_N_INSNS (3), /* ldrd. */
1805 COSTS_N_INSNS (2), /* ldm_1st. */
1806 1, /* ldm_regs_per_insn_1st. */
1807 1, /* ldm_regs_per_insn_subsequent. */
1808 COSTS_N_INSNS (2), /* loadf. */
1809 COSTS_N_INSNS (3), /* loadd. */
1810 COSTS_N_INSNS (1), /* load_unaligned. */
1811 COSTS_N_INSNS (2), /* store. */
1812 COSTS_N_INSNS (3), /* strd. */
1813 COSTS_N_INSNS (2), /* stm_1st. */
1814 1, /* stm_regs_per_insn_1st. */
1815 1, /* stm_regs_per_insn_subsequent. */
1816 COSTS_N_INSNS (2), /* storef. */
1817 COSTS_N_INSNS (3), /* stored. */
1818 COSTS_N_INSNS (1), /* store_unaligned. */
1819 COSTS_N_INSNS (1), /* loadv. */
1820 COSTS_N_INSNS (1) /* storev. */
1825 COSTS_N_INSNS (7), /* div. */
1826 COSTS_N_INSNS (2), /* mult. */
1827 COSTS_N_INSNS (5), /* mult_addsub. */
1828 COSTS_N_INSNS (3), /* fma. */
1829 COSTS_N_INSNS (1), /* addsub. */
1841 COSTS_N_INSNS (15), /* div. */
1842 COSTS_N_INSNS (5), /* mult. */
1843 COSTS_N_INSNS (7), /* mult_addsub. */
1844 COSTS_N_INSNS (7), /* fma. */
1845 COSTS_N_INSNS (3), /* addsub. */
1858 COSTS_N_INSNS (1), /* alu. */
1859 COSTS_N_INSNS (4), /* mult. */
1860 COSTS_N_INSNS (1), /* movi. */
1861 COSTS_N_INSNS (2), /* dup. */
1862 COSTS_N_INSNS (2) /* extract. */
1866 const struct addr_mode_cost_table generic_addr_mode_costs
=
1870 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1871 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1872 COSTS_N_INSNS (0) /* AMO_WB. */
1876 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1877 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1878 COSTS_N_INSNS (0) /* AMO_WB. */
1882 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1883 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1884 COSTS_N_INSNS (0) /* AMO_WB. */
1888 const struct tune_params arm_slowmul_tune
=
1890 &generic_extra_costs
, /* Insn extra costs. */
1891 &generic_addr_mode_costs
, /* Addressing mode costs. */
1892 NULL
, /* Sched adj cost. */
1893 arm_default_branch_cost
,
1894 &arm_default_vec_cost
,
1895 3, /* Constant limit. */
1896 5, /* Max cond insns. */
1897 8, /* Memset max inline. */
1898 1, /* Issue rate. */
1899 ARM_PREFETCH_NOT_BENEFICIAL
,
1900 tune_params::PREF_CONST_POOL_TRUE
,
1901 tune_params::PREF_LDRD_FALSE
,
1902 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1903 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1904 tune_params::DISPARAGE_FLAGS_NEITHER
,
1905 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1906 tune_params::FUSE_NOTHING
,
1907 tune_params::SCHED_AUTOPREF_OFF
1910 const struct tune_params arm_fastmul_tune
=
1912 &generic_extra_costs
, /* Insn extra costs. */
1913 &generic_addr_mode_costs
, /* Addressing mode costs. */
1914 NULL
, /* Sched adj cost. */
1915 arm_default_branch_cost
,
1916 &arm_default_vec_cost
,
1917 1, /* Constant limit. */
1918 5, /* Max cond insns. */
1919 8, /* Memset max inline. */
1920 1, /* Issue rate. */
1921 ARM_PREFETCH_NOT_BENEFICIAL
,
1922 tune_params::PREF_CONST_POOL_TRUE
,
1923 tune_params::PREF_LDRD_FALSE
,
1924 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1925 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1926 tune_params::DISPARAGE_FLAGS_NEITHER
,
1927 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1928 tune_params::FUSE_NOTHING
,
1929 tune_params::SCHED_AUTOPREF_OFF
1932 /* StrongARM has early execution of branches, so a sequence that is worth
1933 skipping is shorter. Set max_insns_skipped to a lower value. */
1935 const struct tune_params arm_strongarm_tune
=
1937 &generic_extra_costs
, /* Insn extra costs. */
1938 &generic_addr_mode_costs
, /* Addressing mode costs. */
1939 NULL
, /* Sched adj cost. */
1940 arm_default_branch_cost
,
1941 &arm_default_vec_cost
,
1942 1, /* Constant limit. */
1943 3, /* Max cond insns. */
1944 8, /* Memset max inline. */
1945 1, /* Issue rate. */
1946 ARM_PREFETCH_NOT_BENEFICIAL
,
1947 tune_params::PREF_CONST_POOL_TRUE
,
1948 tune_params::PREF_LDRD_FALSE
,
1949 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1950 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1951 tune_params::DISPARAGE_FLAGS_NEITHER
,
1952 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1953 tune_params::FUSE_NOTHING
,
1954 tune_params::SCHED_AUTOPREF_OFF
1957 const struct tune_params arm_xscale_tune
=
1959 &generic_extra_costs
, /* Insn extra costs. */
1960 &generic_addr_mode_costs
, /* Addressing mode costs. */
1961 xscale_sched_adjust_cost
,
1962 arm_default_branch_cost
,
1963 &arm_default_vec_cost
,
1964 2, /* Constant limit. */
1965 3, /* Max cond insns. */
1966 8, /* Memset max inline. */
1967 1, /* Issue rate. */
1968 ARM_PREFETCH_NOT_BENEFICIAL
,
1969 tune_params::PREF_CONST_POOL_TRUE
,
1970 tune_params::PREF_LDRD_FALSE
,
1971 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1972 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1973 tune_params::DISPARAGE_FLAGS_NEITHER
,
1974 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1975 tune_params::FUSE_NOTHING
,
1976 tune_params::SCHED_AUTOPREF_OFF
1979 const struct tune_params arm_9e_tune
=
1981 &generic_extra_costs
, /* Insn extra costs. */
1982 &generic_addr_mode_costs
, /* Addressing mode costs. */
1983 NULL
, /* Sched adj cost. */
1984 arm_default_branch_cost
,
1985 &arm_default_vec_cost
,
1986 1, /* Constant limit. */
1987 5, /* Max cond insns. */
1988 8, /* Memset max inline. */
1989 1, /* Issue rate. */
1990 ARM_PREFETCH_NOT_BENEFICIAL
,
1991 tune_params::PREF_CONST_POOL_TRUE
,
1992 tune_params::PREF_LDRD_FALSE
,
1993 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1994 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1995 tune_params::DISPARAGE_FLAGS_NEITHER
,
1996 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1997 tune_params::FUSE_NOTHING
,
1998 tune_params::SCHED_AUTOPREF_OFF
2001 const struct tune_params arm_marvell_pj4_tune
=
2003 &generic_extra_costs
, /* Insn extra costs. */
2004 &generic_addr_mode_costs
, /* Addressing mode costs. */
2005 NULL
, /* Sched adj cost. */
2006 arm_default_branch_cost
,
2007 &arm_default_vec_cost
,
2008 1, /* Constant limit. */
2009 5, /* Max cond insns. */
2010 8, /* Memset max inline. */
2011 2, /* Issue rate. */
2012 ARM_PREFETCH_NOT_BENEFICIAL
,
2013 tune_params::PREF_CONST_POOL_TRUE
,
2014 tune_params::PREF_LDRD_FALSE
,
2015 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2016 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2017 tune_params::DISPARAGE_FLAGS_NEITHER
,
2018 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2019 tune_params::FUSE_NOTHING
,
2020 tune_params::SCHED_AUTOPREF_OFF
2023 const struct tune_params arm_v6t2_tune
=
2025 &generic_extra_costs
, /* Insn extra costs. */
2026 &generic_addr_mode_costs
, /* Addressing mode costs. */
2027 NULL
, /* Sched adj cost. */
2028 arm_default_branch_cost
,
2029 &arm_default_vec_cost
,
2030 1, /* Constant limit. */
2031 5, /* Max cond insns. */
2032 8, /* Memset max inline. */
2033 1, /* Issue rate. */
2034 ARM_PREFETCH_NOT_BENEFICIAL
,
2035 tune_params::PREF_CONST_POOL_FALSE
,
2036 tune_params::PREF_LDRD_FALSE
,
2037 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2038 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2039 tune_params::DISPARAGE_FLAGS_NEITHER
,
2040 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2041 tune_params::FUSE_NOTHING
,
2042 tune_params::SCHED_AUTOPREF_OFF
2046 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2047 const struct tune_params arm_cortex_tune
=
2049 &generic_extra_costs
,
2050 &generic_addr_mode_costs
, /* Addressing mode costs. */
2051 NULL
, /* Sched adj cost. */
2052 arm_default_branch_cost
,
2053 &arm_default_vec_cost
,
2054 1, /* Constant limit. */
2055 5, /* Max cond insns. */
2056 8, /* Memset max inline. */
2057 2, /* Issue rate. */
2058 ARM_PREFETCH_NOT_BENEFICIAL
,
2059 tune_params::PREF_CONST_POOL_FALSE
,
2060 tune_params::PREF_LDRD_FALSE
,
2061 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2062 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2063 tune_params::DISPARAGE_FLAGS_NEITHER
,
2064 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2065 tune_params::FUSE_NOTHING
,
2066 tune_params::SCHED_AUTOPREF_OFF
2069 const struct tune_params arm_cortex_a8_tune
=
2071 &cortexa8_extra_costs
,
2072 &generic_addr_mode_costs
, /* Addressing mode costs. */
2073 NULL
, /* Sched adj cost. */
2074 arm_default_branch_cost
,
2075 &arm_default_vec_cost
,
2076 1, /* Constant limit. */
2077 5, /* Max cond insns. */
2078 8, /* Memset max inline. */
2079 2, /* Issue rate. */
2080 ARM_PREFETCH_NOT_BENEFICIAL
,
2081 tune_params::PREF_CONST_POOL_FALSE
,
2082 tune_params::PREF_LDRD_FALSE
,
2083 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2084 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2085 tune_params::DISPARAGE_FLAGS_NEITHER
,
2086 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2087 tune_params::FUSE_NOTHING
,
2088 tune_params::SCHED_AUTOPREF_OFF
2091 const struct tune_params arm_cortex_a7_tune
=
2093 &cortexa7_extra_costs
,
2094 &generic_addr_mode_costs
, /* Addressing mode costs. */
2095 NULL
, /* Sched adj cost. */
2096 arm_default_branch_cost
,
2097 &arm_default_vec_cost
,
2098 1, /* Constant limit. */
2099 5, /* Max cond insns. */
2100 8, /* Memset max inline. */
2101 2, /* Issue rate. */
2102 ARM_PREFETCH_NOT_BENEFICIAL
,
2103 tune_params::PREF_CONST_POOL_FALSE
,
2104 tune_params::PREF_LDRD_FALSE
,
2105 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2106 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2107 tune_params::DISPARAGE_FLAGS_NEITHER
,
2108 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2109 tune_params::FUSE_NOTHING
,
2110 tune_params::SCHED_AUTOPREF_OFF
2113 const struct tune_params arm_cortex_a15_tune
=
2115 &cortexa15_extra_costs
,
2116 &generic_addr_mode_costs
, /* Addressing mode costs. */
2117 NULL
, /* Sched adj cost. */
2118 arm_default_branch_cost
,
2119 &arm_default_vec_cost
,
2120 1, /* Constant limit. */
2121 2, /* Max cond insns. */
2122 8, /* Memset max inline. */
2123 3, /* Issue rate. */
2124 ARM_PREFETCH_NOT_BENEFICIAL
,
2125 tune_params::PREF_CONST_POOL_FALSE
,
2126 tune_params::PREF_LDRD_TRUE
,
2127 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2128 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2129 tune_params::DISPARAGE_FLAGS_ALL
,
2130 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2131 tune_params::FUSE_NOTHING
,
2132 tune_params::SCHED_AUTOPREF_FULL
2135 const struct tune_params arm_cortex_a35_tune
=
2137 &cortexa53_extra_costs
,
2138 &generic_addr_mode_costs
, /* Addressing mode costs. */
2139 NULL
, /* Sched adj cost. */
2140 arm_default_branch_cost
,
2141 &arm_default_vec_cost
,
2142 1, /* Constant limit. */
2143 5, /* Max cond insns. */
2144 8, /* Memset max inline. */
2145 1, /* Issue rate. */
2146 ARM_PREFETCH_NOT_BENEFICIAL
,
2147 tune_params::PREF_CONST_POOL_FALSE
,
2148 tune_params::PREF_LDRD_FALSE
,
2149 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2150 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2151 tune_params::DISPARAGE_FLAGS_NEITHER
,
2152 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2153 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2154 tune_params::SCHED_AUTOPREF_OFF
2157 const struct tune_params arm_cortex_a53_tune
=
2159 &cortexa53_extra_costs
,
2160 &generic_addr_mode_costs
, /* Addressing mode costs. */
2161 NULL
, /* Sched adj cost. */
2162 arm_default_branch_cost
,
2163 &arm_default_vec_cost
,
2164 1, /* Constant limit. */
2165 5, /* Max cond insns. */
2166 8, /* Memset max inline. */
2167 2, /* Issue rate. */
2168 ARM_PREFETCH_NOT_BENEFICIAL
,
2169 tune_params::PREF_CONST_POOL_FALSE
,
2170 tune_params::PREF_LDRD_FALSE
,
2171 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2172 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2173 tune_params::DISPARAGE_FLAGS_NEITHER
,
2174 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2175 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2176 tune_params::SCHED_AUTOPREF_OFF
2179 const struct tune_params arm_cortex_a57_tune
=
2181 &cortexa57_extra_costs
,
2182 &generic_addr_mode_costs
, /* addressing mode costs */
2183 NULL
, /* Sched adj cost. */
2184 arm_default_branch_cost
,
2185 &arm_default_vec_cost
,
2186 1, /* Constant limit. */
2187 2, /* Max cond insns. */
2188 8, /* Memset max inline. */
2189 3, /* Issue rate. */
2190 ARM_PREFETCH_NOT_BENEFICIAL
,
2191 tune_params::PREF_CONST_POOL_FALSE
,
2192 tune_params::PREF_LDRD_TRUE
,
2193 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2194 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2195 tune_params::DISPARAGE_FLAGS_ALL
,
2196 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2197 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2198 tune_params::SCHED_AUTOPREF_FULL
2201 const struct tune_params arm_exynosm1_tune
=
2203 &exynosm1_extra_costs
,
2204 &generic_addr_mode_costs
, /* Addressing mode costs. */
2205 NULL
, /* Sched adj cost. */
2206 arm_default_branch_cost
,
2207 &arm_default_vec_cost
,
2208 1, /* Constant limit. */
2209 2, /* Max cond insns. */
2210 8, /* Memset max inline. */
2211 3, /* Issue rate. */
2212 ARM_PREFETCH_NOT_BENEFICIAL
,
2213 tune_params::PREF_CONST_POOL_FALSE
,
2214 tune_params::PREF_LDRD_TRUE
,
2215 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2216 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2217 tune_params::DISPARAGE_FLAGS_ALL
,
2218 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2219 tune_params::FUSE_NOTHING
,
2220 tune_params::SCHED_AUTOPREF_OFF
2223 const struct tune_params arm_xgene1_tune
=
2225 &xgene1_extra_costs
,
2226 &generic_addr_mode_costs
, /* Addressing mode costs. */
2227 NULL
, /* Sched adj cost. */
2228 arm_default_branch_cost
,
2229 &arm_default_vec_cost
,
2230 1, /* Constant limit. */
2231 2, /* Max cond insns. */
2232 32, /* Memset max inline. */
2233 4, /* Issue rate. */
2234 ARM_PREFETCH_NOT_BENEFICIAL
,
2235 tune_params::PREF_CONST_POOL_FALSE
,
2236 tune_params::PREF_LDRD_TRUE
,
2237 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2238 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2239 tune_params::DISPARAGE_FLAGS_ALL
,
2240 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2241 tune_params::FUSE_NOTHING
,
2242 tune_params::SCHED_AUTOPREF_OFF
2245 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2246 less appealing. Set max_insns_skipped to a low value. */
2248 const struct tune_params arm_cortex_a5_tune
=
2250 &cortexa5_extra_costs
,
2251 &generic_addr_mode_costs
, /* Addressing mode costs. */
2252 NULL
, /* Sched adj cost. */
2253 arm_cortex_a5_branch_cost
,
2254 &arm_default_vec_cost
,
2255 1, /* Constant limit. */
2256 1, /* Max cond insns. */
2257 8, /* Memset max inline. */
2258 2, /* Issue rate. */
2259 ARM_PREFETCH_NOT_BENEFICIAL
,
2260 tune_params::PREF_CONST_POOL_FALSE
,
2261 tune_params::PREF_LDRD_FALSE
,
2262 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2263 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2264 tune_params::DISPARAGE_FLAGS_NEITHER
,
2265 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2266 tune_params::FUSE_NOTHING
,
2267 tune_params::SCHED_AUTOPREF_OFF
2270 const struct tune_params arm_cortex_a9_tune
=
2272 &cortexa9_extra_costs
,
2273 &generic_addr_mode_costs
, /* Addressing mode costs. */
2274 cortex_a9_sched_adjust_cost
,
2275 arm_default_branch_cost
,
2276 &arm_default_vec_cost
,
2277 1, /* Constant limit. */
2278 5, /* Max cond insns. */
2279 8, /* Memset max inline. */
2280 2, /* Issue rate. */
2281 ARM_PREFETCH_BENEFICIAL(4,32,32),
2282 tune_params::PREF_CONST_POOL_FALSE
,
2283 tune_params::PREF_LDRD_FALSE
,
2284 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2285 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2286 tune_params::DISPARAGE_FLAGS_NEITHER
,
2287 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2288 tune_params::FUSE_NOTHING
,
2289 tune_params::SCHED_AUTOPREF_OFF
2292 const struct tune_params arm_cortex_a12_tune
=
2294 &cortexa12_extra_costs
,
2295 &generic_addr_mode_costs
, /* Addressing mode costs. */
2296 NULL
, /* Sched adj cost. */
2297 arm_default_branch_cost
,
2298 &arm_default_vec_cost
, /* Vectorizer costs. */
2299 1, /* Constant limit. */
2300 2, /* Max cond insns. */
2301 8, /* Memset max inline. */
2302 2, /* Issue rate. */
2303 ARM_PREFETCH_NOT_BENEFICIAL
,
2304 tune_params::PREF_CONST_POOL_FALSE
,
2305 tune_params::PREF_LDRD_TRUE
,
2306 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2307 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2308 tune_params::DISPARAGE_FLAGS_ALL
,
2309 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2310 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2311 tune_params::SCHED_AUTOPREF_OFF
2314 const struct tune_params arm_cortex_a73_tune
=
2316 &cortexa57_extra_costs
,
2317 &generic_addr_mode_costs
, /* Addressing mode costs. */
2318 NULL
, /* Sched adj cost. */
2319 arm_default_branch_cost
,
2320 &arm_default_vec_cost
, /* Vectorizer costs. */
2321 1, /* Constant limit. */
2322 2, /* Max cond insns. */
2323 8, /* Memset max inline. */
2324 2, /* Issue rate. */
2325 ARM_PREFETCH_NOT_BENEFICIAL
,
2326 tune_params::PREF_CONST_POOL_FALSE
,
2327 tune_params::PREF_LDRD_TRUE
,
2328 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2329 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2330 tune_params::DISPARAGE_FLAGS_ALL
,
2331 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2332 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2333 tune_params::SCHED_AUTOPREF_FULL
2336 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2337 cycle to execute each. An LDR from the constant pool also takes two cycles
2338 to execute, but mildly increases pipelining opportunity (consecutive
2339 loads/stores can be pipelined together, saving one cycle), and may also
2340 improve icache utilisation. Hence we prefer the constant pool for such
2343 const struct tune_params arm_v7m_tune
=
2346 &generic_addr_mode_costs
, /* Addressing mode costs. */
2347 NULL
, /* Sched adj cost. */
2348 arm_cortex_m_branch_cost
,
2349 &arm_default_vec_cost
,
2350 1, /* Constant limit. */
2351 2, /* Max cond insns. */
2352 8, /* Memset max inline. */
2353 1, /* Issue rate. */
2354 ARM_PREFETCH_NOT_BENEFICIAL
,
2355 tune_params::PREF_CONST_POOL_TRUE
,
2356 tune_params::PREF_LDRD_FALSE
,
2357 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2358 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2359 tune_params::DISPARAGE_FLAGS_NEITHER
,
2360 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2361 tune_params::FUSE_NOTHING
,
2362 tune_params::SCHED_AUTOPREF_OFF
2365 /* Cortex-M7 tuning. */
2367 const struct tune_params arm_cortex_m7_tune
=
2370 &generic_addr_mode_costs
, /* Addressing mode costs. */
2371 NULL
, /* Sched adj cost. */
2372 arm_cortex_m7_branch_cost
,
2373 &arm_default_vec_cost
,
2374 0, /* Constant limit. */
2375 1, /* Max cond insns. */
2376 8, /* Memset max inline. */
2377 2, /* Issue rate. */
2378 ARM_PREFETCH_NOT_BENEFICIAL
,
2379 tune_params::PREF_CONST_POOL_TRUE
,
2380 tune_params::PREF_LDRD_FALSE
,
2381 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2382 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2383 tune_params::DISPARAGE_FLAGS_NEITHER
,
2384 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2385 tune_params::FUSE_NOTHING
,
2386 tune_params::SCHED_AUTOPREF_OFF
2389 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2390 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2392 const struct tune_params arm_v6m_tune
=
2394 &generic_extra_costs
, /* Insn extra costs. */
2395 &generic_addr_mode_costs
, /* Addressing mode costs. */
2396 NULL
, /* Sched adj cost. */
2397 arm_default_branch_cost
,
2398 &arm_default_vec_cost
, /* Vectorizer costs. */
2399 1, /* Constant limit. */
2400 5, /* Max cond insns. */
2401 8, /* Memset max inline. */
2402 1, /* Issue rate. */
2403 ARM_PREFETCH_NOT_BENEFICIAL
,
2404 tune_params::PREF_CONST_POOL_FALSE
,
2405 tune_params::PREF_LDRD_FALSE
,
2406 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2407 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2408 tune_params::DISPARAGE_FLAGS_NEITHER
,
2409 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2410 tune_params::FUSE_NOTHING
,
2411 tune_params::SCHED_AUTOPREF_OFF
2414 const struct tune_params arm_fa726te_tune
=
2416 &generic_extra_costs
, /* Insn extra costs. */
2417 &generic_addr_mode_costs
, /* Addressing mode costs. */
2418 fa726te_sched_adjust_cost
,
2419 arm_default_branch_cost
,
2420 &arm_default_vec_cost
,
2421 1, /* Constant limit. */
2422 5, /* Max cond insns. */
2423 8, /* Memset max inline. */
2424 2, /* Issue rate. */
2425 ARM_PREFETCH_NOT_BENEFICIAL
,
2426 tune_params::PREF_CONST_POOL_TRUE
,
2427 tune_params::PREF_LDRD_FALSE
,
2428 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2429 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2430 tune_params::DISPARAGE_FLAGS_NEITHER
,
2431 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2432 tune_params::FUSE_NOTHING
,
2433 tune_params::SCHED_AUTOPREF_OFF
2436 /* Auto-generated CPU, FPU and architecture tables. */
2437 #include "arm-cpu-data.h"
2439 /* The name of the preprocessor macro to define for this architecture. PROFILE
2440 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2441 is thus chosen to be big enough to hold the longest architecture name. */
2443 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2445 /* Supported TLS relocations. */
2456 TLS_DESCSEQ
/* GNU scheme */
2459 /* The maximum number of insns to be used when loading a constant. */
2461 arm_constant_limit (bool size_p
)
2463 return size_p
? 1 : current_tune
->constant_limit
;
2466 /* Emit an insn that's a simple single-set. Both the operands must be known
2468 inline static rtx_insn
*
2469 emit_set_insn (rtx x
, rtx y
)
2471 return emit_insn (gen_rtx_SET (x
, y
));
2474 /* Return the number of bits set in VALUE. */
2476 bit_count (unsigned long value
)
2478 unsigned long count
= 0;
2483 value
&= value
- 1; /* Clear the least-significant set bit. */
2489 /* Return the number of bits set in BMAP. */
2491 bitmap_popcount (const sbitmap bmap
)
2493 unsigned int count
= 0;
2495 sbitmap_iterator sbi
;
2497 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2506 } arm_fixed_mode_set
;
2508 /* A small helper for setting fixed-point library libfuncs. */
2511 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2512 const char *funcname
, const char *modename
,
2517 if (num_suffix
== 0)
2518 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2520 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2522 set_optab_libfunc (optable
, mode
, buffer
);
2526 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2527 machine_mode from
, const char *funcname
,
2528 const char *toname
, const char *fromname
)
2531 const char *maybe_suffix_2
= "";
2533 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2534 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2535 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2536 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2537 maybe_suffix_2
= "2";
2539 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2542 set_conv_libfunc (optable
, to
, from
, buffer
);
2545 static GTY(()) rtx speculation_barrier_libfunc
;
2547 /* Record that we have no arithmetic or comparison libfuncs for
2548 machine mode MODE. */
2551 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode
)
2554 set_optab_libfunc (add_optab
, mode
, NULL
);
2555 set_optab_libfunc (sdiv_optab
, mode
, NULL
);
2556 set_optab_libfunc (smul_optab
, mode
, NULL
);
2557 set_optab_libfunc (neg_optab
, mode
, NULL
);
2558 set_optab_libfunc (sub_optab
, mode
, NULL
);
2561 set_optab_libfunc (eq_optab
, mode
, NULL
);
2562 set_optab_libfunc (ne_optab
, mode
, NULL
);
2563 set_optab_libfunc (lt_optab
, mode
, NULL
);
2564 set_optab_libfunc (le_optab
, mode
, NULL
);
2565 set_optab_libfunc (ge_optab
, mode
, NULL
);
2566 set_optab_libfunc (gt_optab
, mode
, NULL
);
2567 set_optab_libfunc (unord_optab
, mode
, NULL
);
2570 /* Set up library functions unique to ARM. */
2572 arm_init_libfuncs (void)
2574 machine_mode mode_iter
;
2576 /* For Linux, we have access to kernel support for atomic operations. */
2577 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2578 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2580 /* There are no special library functions unless we are using the
2585 /* The functions below are described in Section 4 of the "Run-Time
2586 ABI for the ARM architecture", Version 1.0. */
2588 /* Double-precision floating-point arithmetic. Table 2. */
2589 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2590 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2591 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2592 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2593 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2595 /* Double-precision comparisons. Table 3. */
2596 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2597 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2598 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2599 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2600 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2601 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2602 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2604 /* Single-precision floating-point arithmetic. Table 4. */
2605 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2606 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2607 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2608 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2609 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2611 /* Single-precision comparisons. Table 5. */
2612 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2613 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2614 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2615 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2616 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2617 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2618 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2620 /* Floating-point to integer conversions. Table 6. */
2621 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2622 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2623 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2624 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2625 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2626 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2627 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2628 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2630 /* Conversions between floating types. Table 7. */
2631 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2632 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2634 /* Integer to floating-point conversions. Table 8. */
2635 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2636 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2637 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2638 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2639 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2640 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2641 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2642 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2644 /* Long long. Table 9. */
2645 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2646 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2647 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2648 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2649 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2650 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2651 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2652 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2654 /* Integer (32/32->32) division. \S 4.3.1. */
2655 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2656 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2658 /* The divmod functions are designed so that they can be used for
2659 plain division, even though they return both the quotient and the
2660 remainder. The quotient is returned in the usual location (i.e.,
2661 r0 for SImode, {r0, r1} for DImode), just as would be expected
2662 for an ordinary division routine. Because the AAPCS calling
2663 conventions specify that all of { r0, r1, r2, r3 } are
2664 callee-saved registers, there is no need to tell the compiler
2665 explicitly that those registers are clobbered by these
2667 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2668 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2670 /* For SImode division the ABI provides div-without-mod routines,
2671 which are faster. */
2672 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2673 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2675 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2676 divmod libcalls instead. */
2677 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2678 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2679 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2680 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2682 /* Half-precision float operations. The compiler handles all operations
2683 with NULL libfuncs by converting the SFmode. */
2684 switch (arm_fp16_format
)
2686 case ARM_FP16_FORMAT_IEEE
:
2687 case ARM_FP16_FORMAT_ALTERNATIVE
:
2690 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2691 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2693 : "__gnu_f2h_alternative"));
2694 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2695 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2697 : "__gnu_h2f_alternative"));
2699 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2700 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2702 : "__gnu_d2h_alternative"));
2704 arm_block_arith_comp_libfuncs_for_mode (HFmode
);
2711 /* For all possible libcalls in BFmode, record NULL. */
2712 FOR_EACH_MODE_IN_CLASS (mode_iter
, MODE_FLOAT
)
2714 set_conv_libfunc (trunc_optab
, BFmode
, mode_iter
, NULL
);
2715 set_conv_libfunc (trunc_optab
, mode_iter
, BFmode
, NULL
);
2716 set_conv_libfunc (sext_optab
, mode_iter
, BFmode
, NULL
);
2717 set_conv_libfunc (sext_optab
, BFmode
, mode_iter
, NULL
);
2719 arm_block_arith_comp_libfuncs_for_mode (BFmode
);
2721 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2723 const arm_fixed_mode_set fixed_arith_modes
[] =
2726 { E_UQQmode
, "uqq" },
2728 { E_UHQmode
, "uhq" },
2730 { E_USQmode
, "usq" },
2732 { E_UDQmode
, "udq" },
2734 { E_UTQmode
, "utq" },
2736 { E_UHAmode
, "uha" },
2738 { E_USAmode
, "usa" },
2740 { E_UDAmode
, "uda" },
2742 { E_UTAmode
, "uta" }
2744 const arm_fixed_mode_set fixed_conv_modes
[] =
2747 { E_UQQmode
, "uqq" },
2749 { E_UHQmode
, "uhq" },
2751 { E_USQmode
, "usq" },
2753 { E_UDQmode
, "udq" },
2755 { E_UTQmode
, "utq" },
2757 { E_UHAmode
, "uha" },
2759 { E_USAmode
, "usa" },
2761 { E_UDAmode
, "uda" },
2763 { E_UTAmode
, "uta" },
2774 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2776 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2777 "add", fixed_arith_modes
[i
].name
, 3);
2778 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2779 "ssadd", fixed_arith_modes
[i
].name
, 3);
2780 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2781 "usadd", fixed_arith_modes
[i
].name
, 3);
2782 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2783 "sub", fixed_arith_modes
[i
].name
, 3);
2784 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2785 "sssub", fixed_arith_modes
[i
].name
, 3);
2786 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2787 "ussub", fixed_arith_modes
[i
].name
, 3);
2788 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2789 "mul", fixed_arith_modes
[i
].name
, 3);
2790 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2791 "ssmul", fixed_arith_modes
[i
].name
, 3);
2792 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2793 "usmul", fixed_arith_modes
[i
].name
, 3);
2794 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2795 "div", fixed_arith_modes
[i
].name
, 3);
2796 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2797 "udiv", fixed_arith_modes
[i
].name
, 3);
2798 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2799 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2800 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2801 "usdiv", fixed_arith_modes
[i
].name
, 3);
2802 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2803 "neg", fixed_arith_modes
[i
].name
, 2);
2804 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2805 "ssneg", fixed_arith_modes
[i
].name
, 2);
2806 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2807 "usneg", fixed_arith_modes
[i
].name
, 2);
2808 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2809 "ashl", fixed_arith_modes
[i
].name
, 3);
2810 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2811 "ashr", fixed_arith_modes
[i
].name
, 3);
2812 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2813 "lshr", fixed_arith_modes
[i
].name
, 3);
2814 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2815 "ssashl", fixed_arith_modes
[i
].name
, 3);
2816 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2817 "usashl", fixed_arith_modes
[i
].name
, 3);
2818 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2819 "cmp", fixed_arith_modes
[i
].name
, 2);
2822 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2823 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2826 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2827 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2830 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2831 fixed_conv_modes
[j
].mode
, "fract",
2832 fixed_conv_modes
[i
].name
,
2833 fixed_conv_modes
[j
].name
);
2834 arm_set_fixed_conv_libfunc (satfract_optab
,
2835 fixed_conv_modes
[i
].mode
,
2836 fixed_conv_modes
[j
].mode
, "satfract",
2837 fixed_conv_modes
[i
].name
,
2838 fixed_conv_modes
[j
].name
);
2839 arm_set_fixed_conv_libfunc (fractuns_optab
,
2840 fixed_conv_modes
[i
].mode
,
2841 fixed_conv_modes
[j
].mode
, "fractuns",
2842 fixed_conv_modes
[i
].name
,
2843 fixed_conv_modes
[j
].name
);
2844 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2845 fixed_conv_modes
[i
].mode
,
2846 fixed_conv_modes
[j
].mode
, "satfractuns",
2847 fixed_conv_modes
[i
].name
,
2848 fixed_conv_modes
[j
].name
);
2852 if (TARGET_AAPCS_BASED
)
2853 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2855 speculation_barrier_libfunc
= init_one_libfunc ("__speculation_barrier");
2858 /* Implement TARGET_GIMPLE_FOLD_BUILTIN. */
2860 arm_gimple_fold_builtin (gimple_stmt_iterator
*gsi
)
2862 gcall
*stmt
= as_a
<gcall
*> (gsi_stmt (*gsi
));
2863 tree fndecl
= gimple_call_fndecl (stmt
);
2864 unsigned int code
= DECL_MD_FUNCTION_CODE (fndecl
);
2865 unsigned int subcode
= code
>> ARM_BUILTIN_SHIFT
;
2866 gimple
*new_stmt
= NULL
;
2867 switch (code
& ARM_BUILTIN_CLASS
)
2869 case ARM_BUILTIN_GENERAL
:
2871 case ARM_BUILTIN_MVE
:
2872 new_stmt
= arm_mve::gimple_fold_builtin (subcode
, stmt
);
2877 gsi_replace (gsi
, new_stmt
, true);
2881 /* On AAPCS systems, this is the "struct __va_list". */
2882 static GTY(()) tree va_list_type
;
2884 /* Return the type to use as __builtin_va_list. */
2886 arm_build_builtin_va_list (void)
2891 if (!TARGET_AAPCS_BASED
)
2892 return std_build_builtin_va_list ();
2894 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2902 The C Library ABI further reinforces this definition in \S
2905 We must follow this definition exactly. The structure tag
2906 name is visible in C++ mangled names, and thus forms a part
2907 of the ABI. The field name may be used by people who
2908 #include <stdarg.h>. */
2909 /* Create the type. */
2910 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2911 /* Give it the required name. */
2912 va_list_name
= build_decl (BUILTINS_LOCATION
,
2914 get_identifier ("__va_list"),
2916 DECL_ARTIFICIAL (va_list_name
) = 1;
2917 TYPE_NAME (va_list_type
) = va_list_name
;
2918 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2919 /* Create the __ap field. */
2920 ap_field
= build_decl (BUILTINS_LOCATION
,
2922 get_identifier ("__ap"),
2924 DECL_ARTIFICIAL (ap_field
) = 1;
2925 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2926 TYPE_FIELDS (va_list_type
) = ap_field
;
2927 /* Compute its layout. */
2928 layout_type (va_list_type
);
2930 return va_list_type
;
2933 /* Return an expression of type "void *" pointing to the next
2934 available argument in a variable-argument list. VALIST is the
2935 user-level va_list object, of type __builtin_va_list. */
2937 arm_extract_valist_ptr (tree valist
)
2939 if (TREE_TYPE (valist
) == error_mark_node
)
2940 return error_mark_node
;
2942 /* On an AAPCS target, the pointer is stored within "struct
2944 if (TARGET_AAPCS_BASED
)
2946 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2947 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2948 valist
, ap_field
, NULL_TREE
);
2954 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2956 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2958 valist
= arm_extract_valist_ptr (valist
);
2959 std_expand_builtin_va_start (valist
, nextarg
);
2962 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2964 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2967 valist
= arm_extract_valist_ptr (valist
);
2968 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2971 /* Check any incompatible options that the user has specified. */
2973 arm_option_check_internal (struct gcc_options
*opts
)
2975 int flags
= opts
->x_target_flags
;
2977 /* iWMMXt and NEON are incompatible. */
2979 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2980 error ("iWMMXt and NEON are incompatible");
2982 /* Make sure that the processor choice does not conflict with any of the
2983 other command line choices. */
2984 if (TARGET_ARM_P (flags
)
2985 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2986 error ("target CPU does not support ARM mode");
2988 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2989 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2990 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2992 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2993 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2995 /* If this target is normally configured to use APCS frames, warn if they
2996 are turned off and debugging is turned on. */
2997 if (TARGET_ARM_P (flags
)
2998 && write_symbols
!= NO_DEBUG
2999 && !TARGET_APCS_FRAME
3000 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
3001 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
3004 /* iWMMXt unsupported under Thumb mode. */
3005 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
3006 error ("iWMMXt unsupported under Thumb mode");
3008 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
3009 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
3011 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
3013 error ("RTP PIC is incompatible with Thumb");
3017 if (target_pure_code
|| target_slow_flash_data
)
3019 const char *flag
= (target_pure_code
? "-mpure-code" :
3020 "-mslow-flash-data");
3021 bool common_unsupported_modes
= arm_arch_notm
|| flag_pic
|| TARGET_NEON
;
3023 /* We only support -mslow-flash-data on M-profile targets with
3025 if (target_slow_flash_data
&& (!TARGET_HAVE_MOVT
|| common_unsupported_modes
))
3026 error ("%s only supports non-pic code on M-profile targets with the "
3027 "MOVT instruction", flag
);
3029 /* We only support -mpure-code on M-profile targets. */
3030 if (target_pure_code
&& common_unsupported_modes
)
3031 error ("%s only supports non-pic code on M-profile targets", flag
);
3033 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
3034 -mword-relocations forbids relocation of MOVT/MOVW. */
3035 if (target_word_relocations
)
3036 error ("%s incompatible with %<-mword-relocations%>", flag
);
3040 /* Recompute the global settings depending on target attribute options. */
3043 arm_option_params_internal (void)
3045 /* If we are not using the default (ARM mode) section anchor offset
3046 ranges, then set the correct ranges now. */
3049 /* Thumb-1 LDR instructions cannot have negative offsets.
3050 Permissible positive offset ranges are 5-bit (for byte loads),
3051 6-bit (for halfword loads), or 7-bit (for word loads).
3052 Empirical results suggest a 7-bit anchor range gives the best
3053 overall code size. */
3054 targetm
.min_anchor_offset
= 0;
3055 targetm
.max_anchor_offset
= 127;
3057 else if (TARGET_THUMB2
)
3059 /* The minimum is set such that the total size of the block
3060 for a particular anchor is 248 + 1 + 4095 bytes, which is
3061 divisible by eight, ensuring natural spacing of anchors. */
3062 targetm
.min_anchor_offset
= -248;
3063 targetm
.max_anchor_offset
= 4095;
3067 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
3068 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
3071 /* Increase the number of conditional instructions with -Os. */
3072 max_insns_skipped
= optimize_size
? 4 : current_tune
->max_insns_skipped
;
3074 /* For THUMB2, we limit the conditional sequence to one IT block. */
3076 max_insns_skipped
= MIN (max_insns_skipped
, MAX_INSN_PER_IT_BLOCK
);
3079 targetm
.md_asm_adjust
= thumb1_md_asm_adjust
;
3081 targetm
.md_asm_adjust
= arm_md_asm_adjust
;
3084 /* True if -mflip-thumb should next add an attribute for the default
3085 mode, false if it should next add an attribute for the opposite mode. */
3086 static GTY(()) bool thumb_flipper
;
3088 /* Options after initial target override. */
3089 static GTY(()) tree init_optimize
;
3092 arm_override_options_after_change_1 (struct gcc_options
*opts
,
3093 struct gcc_options
*opts_set
)
3095 /* -falign-functions without argument: supply one. */
3096 if (opts
->x_flag_align_functions
&& !opts_set
->x_str_align_functions
)
3097 opts
->x_str_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
3098 && opts
->x_optimize_size
? "2" : "4";
3101 /* Implement targetm.override_options_after_change. */
3104 arm_override_options_after_change (void)
3106 arm_override_options_after_change_1 (&global_options
, &global_options_set
);
3109 /* Implement TARGET_OPTION_RESTORE. */
3111 arm_option_restore (struct gcc_options */
* opts */
,
3112 struct gcc_options */
* opts_set */
,
3113 struct cl_target_option
*ptr
)
3115 arm_configure_build_target (&arm_active_target
, ptr
, false);
3116 arm_option_reconfigure_globals ();
3119 /* Reset options between modes that the user has specified. */
3121 arm_option_override_internal (struct gcc_options
*opts
,
3122 struct gcc_options
*opts_set
)
3124 arm_override_options_after_change_1 (opts
, opts_set
);
3126 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3128 /* The default is to enable interworking, so this warning message would
3129 be confusing to users who have just compiled with
3130 eg, -march=armv4. */
3131 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3132 opts
->x_target_flags
&= ~MASK_INTERWORK
;
3135 if (TARGET_THUMB_P (opts
->x_target_flags
)
3136 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3138 warning (0, "target CPU does not support THUMB instructions");
3139 opts
->x_target_flags
&= ~MASK_THUMB
;
3142 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
3144 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3145 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
3148 /* Callee super interworking implies thumb interworking. Adding
3149 this to the flags here simplifies the logic elsewhere. */
3150 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
3151 opts
->x_target_flags
|= MASK_INTERWORK
;
3153 /* need to remember initial values so combinaisons of options like
3154 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3155 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
3157 if (! opts_set
->x_arm_restrict_it
)
3158 opts
->x_arm_restrict_it
= arm_arch8
;
3160 /* ARM execution state and M profile don't have [restrict] IT. */
3161 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
3162 opts
->x_arm_restrict_it
= 0;
3164 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3165 if (!opts_set
->x_arm_restrict_it
3166 && (opts_set
->x_arm_cpu_string
|| opts_set
->x_arm_tune_string
))
3167 opts
->x_arm_restrict_it
= 0;
3169 /* Enable -munaligned-access by default for
3170 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3171 i.e. Thumb2 and ARM state only.
3172 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3173 - ARMv8 architecture-base processors.
3175 Disable -munaligned-access by default for
3176 - all pre-ARMv6 architecture-based processors
3177 - ARMv6-M architecture-based processors
3178 - ARMv8-M Baseline processors. */
3180 if (! opts_set
->x_unaligned_access
)
3182 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
3183 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
3185 else if (opts
->x_unaligned_access
== 1
3186 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3188 warning (0, "target CPU does not support unaligned accesses");
3189 opts
->x_unaligned_access
= 0;
3192 /* Don't warn since it's on by default in -O2. */
3193 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3194 opts
->x_flag_schedule_insns
= 0;
3196 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3198 /* Disable shrink-wrap when optimizing function for size, since it tends to
3199 generate additional returns. */
3200 if (optimize_function_for_size_p (cfun
)
3201 && TARGET_THUMB2_P (opts
->x_target_flags
))
3202 opts
->x_flag_shrink_wrap
= false;
3204 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3206 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3207 - epilogue_insns - does not accurately model the corresponding insns
3208 emitted in the asm file. In particular, see the comment in thumb_exit
3209 'Find out how many of the (return) argument registers we can corrupt'.
3210 As a consequence, the epilogue may clobber registers without fipa-ra
3211 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3212 TODO: Accurately model clobbers for epilogue_insns and reenable
3214 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3215 opts
->x_flag_ipa_ra
= 0;
3217 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3219 /* Thumb2 inline assembly code should always use unified syntax.
3220 This will apply to ARM and Thumb1 eventually. */
3221 if (TARGET_THUMB2_P (opts
->x_target_flags
))
3222 opts
->x_inline_asm_unified
= true;
3224 if (arm_stack_protector_guard
== SSP_GLOBAL
3225 && opts
->x_arm_stack_protector_guard_offset_str
)
3227 error ("incompatible options %<-mstack-protector-guard=global%> and "
3228 "%<-mstack-protector-guard-offset=%s%>",
3229 arm_stack_protector_guard_offset_str
);
3232 if (opts
->x_arm_stack_protector_guard_offset_str
)
3235 const char *str
= arm_stack_protector_guard_offset_str
;
3237 long offs
= strtol (arm_stack_protector_guard_offset_str
, &end
, 0);
3238 if (!*str
|| *end
|| errno
)
3239 error ("%qs is not a valid offset in %qs", str
,
3240 "-mstack-protector-guard-offset=");
3241 arm_stack_protector_guard_offset
= offs
;
3244 if (arm_current_function_pac_enabled_p ())
3246 if (!arm_arch8m_main
)
3247 error ("This architecture does not support branch protection "
3249 if (TARGET_TPCS_FRAME
)
3250 sorry ("Return address signing is not supported with %<-mtpcs-frame%>.");
3253 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3254 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3258 static sbitmap isa_all_fpubits_internal
;
3259 static sbitmap isa_all_fpbits
;
3260 static sbitmap isa_quirkbits
;
3262 /* Configure a build target TARGET from the user-specified options OPTS and
3263 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3264 architecture have been specified, but the two are not identical. */
3266 arm_configure_build_target (struct arm_build_target
*target
,
3267 struct cl_target_option
*opts
,
3268 bool warn_compatible
)
3270 const cpu_option
*arm_selected_tune
= NULL
;
3271 const arch_option
*arm_selected_arch
= NULL
;
3272 const cpu_option
*arm_selected_cpu
= NULL
;
3273 const arm_fpu_desc
*arm_selected_fpu
= NULL
;
3274 const char *tune_opts
= NULL
;
3275 const char *arch_opts
= NULL
;
3276 const char *cpu_opts
= NULL
;
3278 bitmap_clear (target
->isa
);
3279 target
->core_name
= NULL
;
3280 target
->arch_name
= NULL
;
3282 if (opts
->x_arm_arch_string
)
3284 arm_selected_arch
= arm_parse_arch_option_name (all_architectures
,
3286 opts
->x_arm_arch_string
);
3287 arch_opts
= strchr (opts
->x_arm_arch_string
, '+');
3290 if (opts
->x_arm_cpu_string
)
3292 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "-mcpu",
3293 opts
->x_arm_cpu_string
);
3294 cpu_opts
= strchr (opts
->x_arm_cpu_string
, '+');
3295 arm_selected_tune
= arm_selected_cpu
;
3296 /* If taking the tuning from -mcpu, we don't need to rescan the
3297 options for tuning. */
3300 if (opts
->x_arm_tune_string
)
3302 arm_selected_tune
= arm_parse_cpu_option_name (all_cores
, "-mtune",
3303 opts
->x_arm_tune_string
);
3304 tune_opts
= strchr (opts
->x_arm_tune_string
, '+');
3307 if (opts
->x_arm_branch_protection_string
)
3309 aarch_validate_mbranch_protection (opts
->x_arm_branch_protection_string
);
3311 if (aarch_ra_sign_key
!= AARCH_KEY_A
)
3313 warning (0, "invalid key type for %<-mbranch-protection=%>");
3314 aarch_ra_sign_key
= AARCH_KEY_A
;
3318 if (arm_selected_arch
)
3320 arm_initialize_isa (target
->isa
, arm_selected_arch
->common
.isa_bits
);
3321 arm_parse_option_features (target
->isa
, &arm_selected_arch
->common
,
3324 if (arm_selected_cpu
)
3326 auto_sbitmap
cpu_isa (isa_num_bits
);
3327 auto_sbitmap
isa_delta (isa_num_bits
);
3329 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->common
.isa_bits
);
3330 arm_parse_option_features (cpu_isa
, &arm_selected_cpu
->common
,
3332 bitmap_xor (isa_delta
, cpu_isa
, target
->isa
);
3333 /* Ignore any bits that are quirk bits. */
3334 bitmap_and_compl (isa_delta
, isa_delta
, isa_quirkbits
);
3335 /* If the user (or the default configuration) has specified a
3336 specific FPU, then ignore any bits that depend on the FPU
3337 configuration. Do similarly if using the soft-float
3339 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
3340 || arm_float_abi
== ARM_FLOAT_ABI_SOFT
)
3341 bitmap_and_compl (isa_delta
, isa_delta
, isa_all_fpbits
);
3343 if (!bitmap_empty_p (isa_delta
))
3345 if (warn_compatible
)
3346 warning (0, "switch %<-mcpu=%s%> conflicts "
3347 "with switch %<-march=%s%>",
3348 opts
->x_arm_cpu_string
,
3349 opts
->x_arm_arch_string
);
3351 /* -march wins for code generation.
3352 -mcpu wins for default tuning. */
3353 if (!arm_selected_tune
)
3354 arm_selected_tune
= arm_selected_cpu
;
3356 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3357 target
->arch_name
= arm_selected_arch
->common
.name
;
3361 /* Architecture and CPU are essentially the same.
3362 Prefer the CPU setting. */
3363 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3364 target
->core_name
= arm_selected_cpu
->common
.name
;
3365 /* Copy the CPU's capabilities, so that we inherit the
3366 appropriate extensions and quirks. */
3367 bitmap_copy (target
->isa
, cpu_isa
);
3372 /* Pick a CPU based on the architecture. */
3373 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3374 target
->arch_name
= arm_selected_arch
->common
.name
;
3375 /* Note: target->core_name is left unset in this path. */
3378 else if (arm_selected_cpu
)
3380 target
->core_name
= arm_selected_cpu
->common
.name
;
3381 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3382 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3384 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3386 /* If the user did not specify a processor or architecture, choose
3390 const cpu_option
*sel
;
3391 auto_sbitmap
sought_isa (isa_num_bits
);
3392 bitmap_clear (sought_isa
);
3393 auto_sbitmap
default_isa (isa_num_bits
);
3395 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "default CPU",
3396 TARGET_CPU_DEFAULT
);
3397 cpu_opts
= strchr (TARGET_CPU_DEFAULT
, '+');
3398 gcc_assert (arm_selected_cpu
->common
.name
);
3400 /* RWE: All of the selection logic below (to the end of this
3401 'if' clause) looks somewhat suspect. It appears to be mostly
3402 there to support forcing thumb support when the default CPU
3403 does not have thumb (somewhat dubious in terms of what the
3404 user might be expecting). I think it should be removed once
3405 support for the pre-thumb era cores is removed. */
3406 sel
= arm_selected_cpu
;
3407 arm_initialize_isa (default_isa
, sel
->common
.isa_bits
);
3408 arm_parse_option_features (default_isa
, &arm_selected_cpu
->common
,
3411 /* Now check to see if the user has specified any command line
3412 switches that require certain abilities from the cpu. */
3414 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3415 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3417 /* If there are such requirements and the default CPU does not
3418 satisfy them, we need to run over the complete list of
3419 cores looking for one that is satisfactory. */
3420 if (!bitmap_empty_p (sought_isa
)
3421 && !bitmap_subset_p (sought_isa
, default_isa
))
3423 auto_sbitmap
candidate_isa (isa_num_bits
);
3424 /* We're only interested in a CPU with at least the
3425 capabilities of the default CPU and the required
3426 additional features. */
3427 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3429 /* Try to locate a CPU type that supports all of the abilities
3430 of the default CPU, plus the extra abilities requested by
3432 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3434 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3435 /* An exact match? */
3436 if (bitmap_equal_p (default_isa
, candidate_isa
))
3440 if (sel
->common
.name
== NULL
)
3442 unsigned current_bit_count
= isa_num_bits
;
3443 const cpu_option
*best_fit
= NULL
;
3445 /* Ideally we would like to issue an error message here
3446 saying that it was not possible to find a CPU compatible
3447 with the default CPU, but which also supports the command
3448 line options specified by the programmer, and so they
3449 ought to use the -mcpu=<name> command line option to
3450 override the default CPU type.
3452 If we cannot find a CPU that has exactly the
3453 characteristics of the default CPU and the given
3454 command line options we scan the array again looking
3455 for a best match. The best match must have at least
3456 the capabilities of the perfect match. */
3457 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3459 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3461 if (bitmap_subset_p (default_isa
, candidate_isa
))
3465 bitmap_and_compl (candidate_isa
, candidate_isa
,
3467 count
= bitmap_popcount (candidate_isa
);
3469 if (count
< current_bit_count
)
3472 current_bit_count
= count
;
3476 gcc_assert (best_fit
);
3480 arm_selected_cpu
= sel
;
3483 /* Now we know the CPU, we can finally initialize the target
3485 target
->core_name
= arm_selected_cpu
->common
.name
;
3486 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3487 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3489 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3492 gcc_assert (arm_selected_cpu
);
3493 gcc_assert (arm_selected_arch
);
3495 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3497 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3498 auto_sbitmap
fpu_bits (isa_num_bits
);
3500 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3501 /* This should clear out ALL bits relating to the FPU/simd
3502 extensions, to avoid potentially invalid combinations later on
3503 that we can't match. At present we only clear out those bits
3504 that can be set by -mfpu. This should be fixed in GCC-12. */
3505 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits_internal
);
3506 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3509 /* If we have the soft-float ABI, clear any feature bits relating to use of
3510 floating-point operations. They'll just confuse things later on. */
3511 if (arm_float_abi
== ARM_FLOAT_ABI_SOFT
)
3512 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpbits
);
3514 /* There may be implied bits which we still need to enable. These are
3515 non-named features which are needed to complete other sets of features,
3516 but cannot be enabled from arm-cpus.in due to being shared between
3517 multiple fgroups. Each entry in all_implied_fbits is of the form
3518 ante -> cons, meaning that if the feature "ante" is enabled, we should
3519 implicitly enable "cons". */
3520 const struct fbit_implication
*impl
= all_implied_fbits
;
3523 if (bitmap_bit_p (target
->isa
, impl
->ante
))
3524 bitmap_set_bit (target
->isa
, impl
->cons
);
3528 if (!arm_selected_tune
)
3529 arm_selected_tune
= arm_selected_cpu
;
3530 else /* Validate the features passed to -mtune. */
3531 arm_parse_option_features (NULL
, &arm_selected_tune
->common
, tune_opts
);
3533 const cpu_tune
*tune_data
= &all_tunes
[arm_selected_tune
- all_cores
];
3535 /* Finish initializing the target structure. */
3536 if (!target
->arch_name
)
3537 target
->arch_name
= arm_selected_arch
->common
.name
;
3538 target
->arch_pp_name
= arm_selected_arch
->arch
;
3539 target
->base_arch
= arm_selected_arch
->base_arch
;
3540 target
->profile
= arm_selected_arch
->profile
;
3542 target
->tune_flags
= tune_data
->tune_flags
;
3543 target
->tune
= tune_data
->tune
;
3544 target
->tune_core
= tune_data
->scheduler
;
3547 /* Fix up any incompatible options that the user has specified. */
3549 arm_option_override (void)
3551 static const enum isa_feature fpu_bitlist_internal
[]
3552 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
3553 /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main. */
3554 static const enum isa_feature fp_bitlist
[]
3555 = { ISA_ALL_FP
, isa_bit_mve_float
, isa_nobit
};
3556 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3557 cl_target_option opts
;
3559 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3560 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3562 isa_all_fpubits_internal
= sbitmap_alloc (isa_num_bits
);
3563 isa_all_fpbits
= sbitmap_alloc (isa_num_bits
);
3564 arm_initialize_isa (isa_all_fpubits_internal
, fpu_bitlist_internal
);
3565 arm_initialize_isa (isa_all_fpbits
, fp_bitlist
);
3567 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3569 if (!OPTION_SET_P (arm_fpu_index
))
3574 ok
= opt_enum_arg_to_value (OPT_mfpu_
, FPUTYPE_AUTO
, &fpu_index
,
3577 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3580 cl_target_option_save (&opts
, &global_options
, &global_options_set
);
3581 arm_configure_build_target (&arm_active_target
, &opts
, true);
3583 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3584 SUBTARGET_OVERRIDE_OPTIONS
;
3587 /* Initialize boolean versions of the architectural flags, for use
3588 in the arm.md file and for enabling feature flags. */
3589 arm_option_reconfigure_globals ();
3591 arm_tune
= arm_active_target
.tune_core
;
3592 tune_flags
= arm_active_target
.tune_flags
;
3593 current_tune
= arm_active_target
.tune
;
3595 /* TBD: Dwarf info for apcs frame is not handled yet. */
3596 if (TARGET_APCS_FRAME
)
3597 flag_shrink_wrap
= false;
3599 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3601 warning (0, "%<-mapcs-stack-check%> incompatible with "
3602 "%<-mno-apcs-frame%>");
3603 target_flags
|= MASK_APCS_FRAME
;
3606 if (TARGET_POKE_FUNCTION_NAME
)
3607 target_flags
|= MASK_APCS_FRAME
;
3609 if (TARGET_APCS_REENT
&& flag_pic
)
3610 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3612 if (TARGET_APCS_REENT
)
3613 warning (0, "APCS reentrant code not supported. Ignored");
3615 /* Set up some tuning parameters. */
3616 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3617 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3618 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3619 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3620 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3621 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3623 /* For arm2/3 there is no need to do any scheduling if we are doing
3624 software floating-point. */
3625 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3626 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3628 /* Override the default structure alignment for AAPCS ABI. */
3629 if (!OPTION_SET_P (arm_structure_size_boundary
))
3631 if (TARGET_AAPCS_BASED
)
3632 arm_structure_size_boundary
= 8;
3636 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3638 if (arm_structure_size_boundary
!= 8
3639 && arm_structure_size_boundary
!= 32
3640 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3642 if (ARM_DOUBLEWORD_ALIGN
)
3644 "structure size boundary can only be set to 8, 32 or 64");
3646 warning (0, "structure size boundary can only be set to 8 or 32");
3647 arm_structure_size_boundary
3648 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3652 if (TARGET_VXWORKS_RTP
)
3654 if (!OPTION_SET_P (arm_pic_data_is_text_relative
))
3655 arm_pic_data_is_text_relative
= 0;
3658 && !arm_pic_data_is_text_relative
3659 && !(OPTION_SET_P (target_flags
) & MASK_SINGLE_PIC_BASE
))
3660 /* When text & data segments don't have a fixed displacement, the
3661 intended use is with a single, read only, pic base register.
3662 Unless the user explicitly requested not to do that, set
3664 target_flags
|= MASK_SINGLE_PIC_BASE
;
3666 /* If stack checking is disabled, we can use r10 as the PIC register,
3667 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3668 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3670 if (TARGET_VXWORKS_RTP
)
3671 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3672 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3675 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3676 arm_pic_register
= 9;
3678 /* If in FDPIC mode then force arm_pic_register to be r9. */
3681 arm_pic_register
= FDPIC_REGNUM
;
3683 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3686 if (arm_pic_register_string
!= NULL
)
3688 int pic_register
= decode_reg_name (arm_pic_register_string
);
3691 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3693 /* Prevent the user from choosing an obviously stupid PIC register. */
3694 else if (pic_register
< 0 || call_used_or_fixed_reg_p (pic_register
)
3695 || pic_register
== HARD_FRAME_POINTER_REGNUM
3696 || pic_register
== STACK_POINTER_REGNUM
3697 || pic_register
>= PC_REGNUM
3698 || (TARGET_VXWORKS_RTP
3699 && (unsigned int) pic_register
!= arm_pic_register
))
3700 error ("unable to use %qs for PIC register", arm_pic_register_string
);
3702 arm_pic_register
= pic_register
;
3706 target_word_relocations
= 1;
3708 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3709 if (fix_cm3_ldrd
== 2)
3711 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_cm3_ldrd
))
3717 /* Enable fix_vlldm by default if required. */
3720 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_vlldm
))
3726 /* Enable fix_aes by default if required. */
3727 if (fix_aes_erratum_1742098
== 2)
3729 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_aes_1742098
))
3730 fix_aes_erratum_1742098
= 1;
3732 fix_aes_erratum_1742098
= 0;
3735 /* Hot/Cold partitioning is not currently supported, since we can't
3736 handle literal pool placement in that case. */
3737 if (flag_reorder_blocks_and_partition
)
3739 inform (input_location
,
3740 "%<-freorder-blocks-and-partition%> not supported "
3741 "on this architecture");
3742 flag_reorder_blocks_and_partition
= 0;
3743 flag_reorder_blocks
= 1;
3747 /* Hoisting PIC address calculations more aggressively provides a small,
3748 but measurable, size reduction for PIC code. Therefore, we decrease
3749 the bar for unrestricted expression hoisting to the cost of PIC address
3750 calculation, which is 2 instructions. */
3751 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3752 param_gcse_unrestricted_cost
, 2);
3754 /* ARM EABI defaults to strict volatile bitfields. */
3755 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3756 && abi_version_at_least(2))
3757 flag_strict_volatile_bitfields
= 1;
3759 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3760 have deemed it beneficial (signified by setting
3761 prefetch.num_slots to 1 or more). */
3762 if (flag_prefetch_loop_arrays
< 0
3765 && current_tune
->prefetch
.num_slots
> 0)
3766 flag_prefetch_loop_arrays
= 1;
3768 /* Set up parameters to be used in prefetching algorithm. Do not
3769 override the defaults unless we are tuning for a core we have
3770 researched values for. */
3771 if (current_tune
->prefetch
.num_slots
> 0)
3772 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3773 param_simultaneous_prefetches
,
3774 current_tune
->prefetch
.num_slots
);
3775 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3776 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3777 param_l1_cache_line_size
,
3778 current_tune
->prefetch
.l1_cache_line_size
);
3779 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3781 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3782 param_destruct_interfere_size
,
3783 current_tune
->prefetch
.l1_cache_line_size
);
3784 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3785 param_construct_interfere_size
,
3786 current_tune
->prefetch
.l1_cache_line_size
);
3790 /* For a generic ARM target, JF Bastien proposed using 64 for both. */
3791 /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3793 /* More recent Cortex chips have a 64-byte cache line, but are marked
3794 ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults. */
3795 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3796 param_destruct_interfere_size
, 64);
3797 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3798 param_construct_interfere_size
, 64);
3801 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3802 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3803 param_l1_cache_size
,
3804 current_tune
->prefetch
.l1_cache_size
);
3806 /* Look through ready list and all of queue for instructions
3807 relevant for L2 auto-prefetcher. */
3808 int sched_autopref_queue_depth
;
3810 switch (current_tune
->sched_autopref
)
3812 case tune_params::SCHED_AUTOPREF_OFF
:
3813 sched_autopref_queue_depth
= -1;
3816 case tune_params::SCHED_AUTOPREF_RANK
:
3817 sched_autopref_queue_depth
= 0;
3820 case tune_params::SCHED_AUTOPREF_FULL
:
3821 sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3828 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3829 param_sched_autopref_queue_depth
,
3830 sched_autopref_queue_depth
);
3832 /* Currently, for slow flash data, we just disable literal pools. We also
3833 disable it for pure-code. */
3834 if (target_slow_flash_data
|| target_pure_code
)
3835 arm_disable_literal_pool
= true;
3837 /* Disable scheduling fusion by default if it's not armv7 processor
3838 or doesn't prefer ldrd/strd. */
3839 if (flag_schedule_fusion
== 2
3840 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3841 flag_schedule_fusion
= 0;
3843 /* Need to remember initial options before they are overriden. */
3844 init_optimize
= build_optimization_node (&global_options
,
3845 &global_options_set
);
3847 arm_options_perform_arch_sanity_checks ();
3848 arm_option_override_internal (&global_options
, &global_options_set
);
3849 arm_option_check_internal (&global_options
);
3850 arm_option_params_internal ();
3852 /* Create the default target_options structure. */
3853 target_option_default_node
= target_option_current_node
3854 = build_target_option_node (&global_options
, &global_options_set
);
3856 /* Register global variables with the garbage collector. */
3857 arm_add_gc_roots ();
3859 /* Init initial mode for testing. */
3860 thumb_flipper
= TARGET_THUMB
;
3864 /* Reconfigure global status flags from the active_target.isa. */
3866 arm_option_reconfigure_globals (void)
3868 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3869 arm_base_arch
= arm_active_target
.base_arch
;
3871 /* Initialize boolean versions of the architectural flags, for use
3872 in the arm.md file. */
3873 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv4
);
3874 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3875 arm_arch5t
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5t
);
3876 arm_arch5te
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5te
);
3877 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6
);
3878 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6k
);
3879 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3880 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3881 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7
);
3882 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7em
);
3883 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8
);
3884 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_1
);
3885 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_2
);
3886 arm_arch8_3
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_3
);
3887 arm_arch8_4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_4
);
3888 arm_arch8_1m_main
= bitmap_bit_p (arm_active_target
.isa
,
3889 isa_bit_armv8_1m_main
);
3890 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3891 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3892 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3893 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3894 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3895 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3896 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3897 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3898 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3899 arm_arch8m_main
= arm_arch7
&& arm_arch_cmse
;
3900 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3901 arm_arch_i8mm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_i8mm
);
3902 arm_arch_bf16
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_bf16
);
3904 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3907 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3908 error ("selected fp16 options are incompatible");
3909 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3913 arm_arch_cde_coproc
= 0;
3914 int cde_bits
[] = {isa_bit_cdecp0
, isa_bit_cdecp1
, isa_bit_cdecp2
,
3915 isa_bit_cdecp3
, isa_bit_cdecp4
, isa_bit_cdecp5
,
3916 isa_bit_cdecp6
, isa_bit_cdecp7
};
3917 for (int i
= 0, e
= ARRAY_SIZE (cde_bits
); i
< e
; i
++)
3919 int cde_bit
= bitmap_bit_p (arm_active_target
.isa
, cde_bits
[i
]);
3922 arm_arch_cde
|= cde_bit
;
3923 arm_arch_cde_coproc
|= arm_arch_cde_coproc_bits
[i
];
3927 /* And finally, set up some quirks. */
3928 arm_arch_no_volatile_ce
3929 = bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_volatile_ce
);
3930 arm_arch6kz
= arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
,
3931 isa_bit_quirk_armv6kz
);
3933 /* Use the cp15 method if it is available. */
3934 if (target_thread_pointer
== TP_AUTO
)
3936 if (arm_arch6k
&& !TARGET_THUMB1
)
3937 target_thread_pointer
= TP_TPIDRURO
;
3939 target_thread_pointer
= TP_SOFT
;
3942 if (!TARGET_HARD_TP
&& arm_stack_protector_guard
== SSP_TLSREG
)
3943 error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3946 /* Perform some validation between the desired architecture and the rest of the
3949 arm_options_perform_arch_sanity_checks (void)
3951 /* V5T code we generate is completely interworking capable, so we turn off
3952 TARGET_INTERWORK here to avoid many tests later on. */
3954 /* XXX However, we must pass the right pre-processor defines to CPP
3955 or GLD can get confused. This is a hack. */
3956 if (TARGET_INTERWORK
)
3957 arm_cpp_interwork
= 1;
3960 target_flags
&= ~MASK_INTERWORK
;
3962 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3963 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3965 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3966 error ("iwmmxt abi requires an iwmmxt capable cpu");
3968 /* BPABI targets use linker tricks to allow interworking on cores
3969 without thumb support. */
3970 if (TARGET_INTERWORK
3972 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3974 warning (0, "target CPU does not support interworking" );
3975 target_flags
&= ~MASK_INTERWORK
;
3978 /* If soft-float is specified then don't use FPU. */
3979 if (TARGET_SOFT_FLOAT
)
3980 arm_fpu_attr
= FPU_NONE
;
3982 arm_fpu_attr
= FPU_VFP
;
3984 if (TARGET_AAPCS_BASED
)
3986 if (TARGET_CALLER_INTERWORKING
)
3987 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3989 if (TARGET_CALLEE_INTERWORKING
)
3990 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3993 /* __fp16 support currently assumes the core has ldrh. */
3994 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3995 sorry ("%<__fp16%> and no ldrh");
3997 if (use_cmse
&& !arm_arch_cmse
)
3998 error ("target CPU does not support ARMv8-M Security Extensions");
4000 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
4001 and ARMv8-M Baseline and Mainline do not allow such configuration. */
4002 if (use_cmse
&& TARGET_HARD_FLOAT
&& LAST_VFP_REGNUM
> LAST_LO_VFP_REGNUM
)
4003 error ("ARMv8-M Security Extensions incompatible with selected FPU");
4006 if (TARGET_AAPCS_BASED
)
4008 if (arm_abi
== ARM_ABI_IWMMXT
)
4009 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
4010 else if (TARGET_HARD_FLOAT_ABI
)
4012 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
4013 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_vfpv2
)
4014 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_mve
))
4015 error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
4018 arm_pcs_default
= ARM_PCS_AAPCS
;
4022 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
4023 sorry ("%<-mfloat-abi=hard%> and VFP");
4025 if (arm_abi
== ARM_ABI_APCS
)
4026 arm_pcs_default
= ARM_PCS_APCS
;
4028 arm_pcs_default
= ARM_PCS_ATPCS
;
4032 /* Test whether a local function descriptor is canonical, i.e.,
4033 whether we can use GOTOFFFUNCDESC to compute the address of the
4036 arm_fdpic_local_funcdesc_p (rtx fnx
)
4039 enum symbol_visibility vis
;
4045 if (! SYMBOL_REF_LOCAL_P (fnx
))
4048 fn
= SYMBOL_REF_DECL (fnx
);
4053 vis
= DECL_VISIBILITY (fn
);
4055 if (vis
== VISIBILITY_PROTECTED
)
4056 /* Private function descriptors for protected functions are not
4057 canonical. Temporarily change the visibility to global so that
4058 we can ensure uniqueness of funcdesc pointers. */
4059 DECL_VISIBILITY (fn
) = VISIBILITY_DEFAULT
;
4061 ret
= default_binds_local_p_1 (fn
, flag_pic
);
4063 DECL_VISIBILITY (fn
) = vis
;
4069 arm_add_gc_roots (void)
4071 gcc_obstack_init(&minipool_obstack
);
4072 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
4075 /* A table of known ARM exception types.
4076 For use with the interrupt function attribute. */
4080 const char *const arg
;
4081 const unsigned long return_value
;
4085 static const isr_attribute_arg isr_attribute_args
[] =
4087 { "IRQ", ARM_FT_ISR
},
4088 { "irq", ARM_FT_ISR
},
4089 { "FIQ", ARM_FT_FIQ
},
4090 { "fiq", ARM_FT_FIQ
},
4091 { "ABORT", ARM_FT_ISR
},
4092 { "abort", ARM_FT_ISR
},
4093 { "UNDEF", ARM_FT_EXCEPTION
},
4094 { "undef", ARM_FT_EXCEPTION
},
4095 { "SWI", ARM_FT_EXCEPTION
},
4096 { "swi", ARM_FT_EXCEPTION
},
4097 { NULL
, ARM_FT_NORMAL
}
4100 /* Returns the (interrupt) function type of the current
4101 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
4103 static unsigned long
4104 arm_isr_value (tree argument
)
4106 const isr_attribute_arg
* ptr
;
4110 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
4112 /* No argument - default to IRQ. */
4113 if (argument
== NULL_TREE
)
4116 /* Get the value of the argument. */
4117 if (TREE_VALUE (argument
) == NULL_TREE
4118 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
4119 return ARM_FT_UNKNOWN
;
4121 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
4123 /* Check it against the list of known arguments. */
4124 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
4125 if (streq (arg
, ptr
->arg
))
4126 return ptr
->return_value
;
4128 /* An unrecognized interrupt type. */
4129 return ARM_FT_UNKNOWN
;
4132 /* Computes the type of the current function. */
4134 static unsigned long
4135 arm_compute_func_type (void)
4137 unsigned long type
= ARM_FT_UNKNOWN
;
4141 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
4143 /* Decide if the current function is volatile. Such functions
4144 never return, and many memory cycles can be saved by not storing
4145 register values that will never be needed again. This optimization
4146 was added to speed up context switching in a kernel application. */
4148 && (TREE_NOTHROW (current_function_decl
)
4149 || !(flag_unwind_tables
4151 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
4152 && TREE_THIS_VOLATILE (current_function_decl
))
4153 type
|= ARM_FT_VOLATILE
;
4155 if (cfun
->static_chain_decl
!= NULL
)
4156 type
|= ARM_FT_NESTED
;
4158 attr
= DECL_ATTRIBUTES (current_function_decl
);
4160 a
= lookup_attribute ("naked", attr
);
4162 type
|= ARM_FT_NAKED
;
4164 a
= lookup_attribute ("isr", attr
);
4166 a
= lookup_attribute ("interrupt", attr
);
4169 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
4171 type
|= arm_isr_value (TREE_VALUE (a
));
4173 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
4174 type
|= ARM_FT_CMSE_ENTRY
;
4179 /* Returns the type of the current function. */
4182 arm_current_func_type (void)
4184 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
4185 cfun
->machine
->func_type
= arm_compute_func_type ();
4187 return cfun
->machine
->func_type
;
4191 arm_allocate_stack_slots_for_args (void)
4193 /* Naked functions should not allocate stack slots for arguments. */
4194 return !IS_NAKED (arm_current_func_type ());
4198 arm_warn_func_return (tree decl
)
4200 /* Naked functions are implemented entirely in assembly, including the
4201 return sequence, so suppress warnings about this. */
4202 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
4206 /* Output assembler code for a block containing the constant parts
4207 of a trampoline, leaving space for the variable parts.
4209 On the ARM, (if r8 is the static chain regnum, and remembering that
4210 referencing pc adds an offset of 8) the trampoline looks like:
4213 .word static chain value
4214 .word function's address
4215 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4217 In FDPIC mode, the trampoline looks like:
4218 .word trampoline address
4219 .word trampoline GOT address
4220 ldr r12, [pc, #8] ; #4 for Arm mode
4221 ldr r9, [pc, #8] ; #4 for Arm mode
4222 ldr pc, [pc, #8] ; #4 for Arm mode
4223 .word static chain value
4225 .word function's address
4229 arm_asm_trampoline_template (FILE *f
)
4231 fprintf (f
, "\t.syntax unified\n");
4235 /* The first two words are a function descriptor pointing to the
4236 trampoline code just below. */
4238 fprintf (f
, "\t.arm\n");
4239 else if (TARGET_THUMB2
)
4240 fprintf (f
, "\t.thumb\n");
4242 /* Only ARM and Thumb-2 are supported. */
4245 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4246 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4247 /* Trampoline code which sets the static chain register but also
4248 PIC register before jumping into real code. */
4249 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4250 STATIC_CHAIN_REGNUM
, PC_REGNUM
,
4251 TARGET_THUMB2
? 8 : 4);
4252 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4253 PIC_OFFSET_TABLE_REGNUM
, PC_REGNUM
,
4254 TARGET_THUMB2
? 8 : 4);
4255 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4256 PC_REGNUM
, PC_REGNUM
,
4257 TARGET_THUMB2
? 8 : 4);
4258 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4260 else if (TARGET_ARM
)
4262 fprintf (f
, "\t.arm\n");
4263 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
4264 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
4266 else if (TARGET_THUMB2
)
4268 fprintf (f
, "\t.thumb\n");
4269 /* The Thumb-2 trampoline is similar to the arm implementation.
4270 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4271 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
4272 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
4273 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
4277 ASM_OUTPUT_ALIGN (f
, 2);
4278 fprintf (f
, "\t.code\t16\n");
4279 fprintf (f
, ".Ltrampoline_start:\n");
4280 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
4281 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4282 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
4283 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4284 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
4285 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
4287 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4288 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4291 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4294 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4296 rtx fnaddr
, mem
, a_tramp
;
4298 emit_block_move (m_tramp
, assemble_trampoline_template (),
4299 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
4303 rtx funcdesc
= XEXP (DECL_RTL (fndecl
), 0);
4304 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
4305 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
4306 /* The function start address is at offset 8, but in Thumb mode
4307 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4309 rtx trampoline_code_start
4310 = plus_constant (Pmode
, XEXP (m_tramp
, 0), TARGET_THUMB2
? 9 : 8);
4312 /* Write initial funcdesc which points to the trampoline. */
4313 mem
= adjust_address (m_tramp
, SImode
, 0);
4314 emit_move_insn (mem
, trampoline_code_start
);
4315 mem
= adjust_address (m_tramp
, SImode
, 4);
4316 emit_move_insn (mem
, gen_rtx_REG (Pmode
, PIC_OFFSET_TABLE_REGNUM
));
4317 /* Setup static chain. */
4318 mem
= adjust_address (m_tramp
, SImode
, 20);
4319 emit_move_insn (mem
, chain_value
);
4320 /* GOT + real function entry point. */
4321 mem
= adjust_address (m_tramp
, SImode
, 24);
4322 emit_move_insn (mem
, gotaddr
);
4323 mem
= adjust_address (m_tramp
, SImode
, 28);
4324 emit_move_insn (mem
, fnaddr
);
4328 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
4329 emit_move_insn (mem
, chain_value
);
4331 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
4332 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4333 emit_move_insn (mem
, fnaddr
);
4336 a_tramp
= XEXP (m_tramp
, 0);
4337 maybe_emit_call_builtin___clear_cache (a_tramp
,
4338 plus_constant (ptr_mode
,
4343 /* Thumb trampolines should be entered in thumb mode, so set
4344 the bottom bit of the address. */
4347 arm_trampoline_adjust_address (rtx addr
)
4349 /* For FDPIC don't fix trampoline address since it's a function
4350 descriptor and not a function address. */
4351 if (TARGET_THUMB
&& !TARGET_FDPIC
)
4352 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
4353 NULL
, 0, OPTAB_LIB_WIDEN
);
4357 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4358 includes call-clobbered registers too. If this is a leaf function
4359 we can just examine the registers used by the RTL, but otherwise we
4360 have to assume that whatever function is called might clobber
4361 anything, and so we have to save all the call-clobbered registers
4363 static inline bool reg_needs_saving_p (unsigned reg
)
4365 unsigned long func_type
= arm_current_func_type ();
4367 if (IS_INTERRUPT (func_type
))
4368 if (df_regs_ever_live_p (reg
)
4369 /* Save call-clobbered core registers. */
4370 || (! crtl
->is_leaf
&& call_used_or_fixed_reg_p (reg
) && reg
< FIRST_VFP_REGNUM
))
4375 if (!df_regs_ever_live_p (reg
)
4376 || call_used_or_fixed_reg_p (reg
))
4382 /* Return 1 if it is possible to return using a single instruction.
4383 If SIBLING is non-null, this is a test for a return before a sibling
4384 call. SIBLING is the call insn, so we can examine its register usage. */
4387 use_return_insn (int iscond
, rtx sibling
)
4390 unsigned int func_type
;
4391 unsigned long saved_int_regs
;
4392 unsigned HOST_WIDE_INT stack_adjust
;
4393 arm_stack_offsets
*offsets
;
4395 /* Never use a return instruction before reload has run. */
4396 if (!reload_completed
)
4399 /* Never use a return instruction when return address signing
4400 mechanism is enabled as it requires more than one
4402 if (arm_current_function_pac_enabled_p ())
4405 func_type
= arm_current_func_type ();
4407 /* Naked, volatile and stack alignment functions need special
4409 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
4412 /* So do interrupt functions that use the frame pointer and Thumb
4413 interrupt functions. */
4414 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
4417 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
4418 && !optimize_function_for_size_p (cfun
))
4421 offsets
= arm_get_frame_offsets ();
4422 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
4424 /* As do variadic functions. */
4425 if (crtl
->args
.pretend_args_size
4426 || cfun
->machine
->uses_anonymous_args
4427 /* Or if the function calls __builtin_eh_return () */
4428 || crtl
->calls_eh_return
4429 /* Or if the function calls alloca */
4430 || cfun
->calls_alloca
4431 /* Or if there is a stack adjustment. However, if the stack pointer
4432 is saved on the stack, we can use a pre-incrementing stack load. */
4433 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
4434 && stack_adjust
== 4))
4435 /* Or if the static chain register was saved above the frame, under the
4436 assumption that the stack pointer isn't saved on the stack. */
4437 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
4438 && arm_compute_static_chain_stack_bytes() != 0))
4441 saved_int_regs
= offsets
->saved_regs_mask
;
4443 /* Unfortunately, the insn
4445 ldmib sp, {..., sp, ...}
4447 triggers a bug on most SA-110 based devices, such that the stack
4448 pointer won't be correctly restored if the instruction takes a
4449 page fault. We work around this problem by popping r3 along with
4450 the other registers, since that is never slower than executing
4451 another instruction.
4453 We test for !arm_arch5t here, because code for any architecture
4454 less than this could potentially be run on one of the buggy
4456 if (stack_adjust
== 4 && !arm_arch5t
&& TARGET_ARM
)
4458 /* Validate that r3 is a call-clobbered register (always true in
4459 the default abi) ... */
4460 if (!call_used_or_fixed_reg_p (3))
4463 /* ... that it isn't being used for a return value ... */
4464 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
4467 /* ... or for a tail-call argument ... */
4470 gcc_assert (CALL_P (sibling
));
4472 if (find_regno_fusage (sibling
, USE
, 3))
4476 /* ... and that there are no call-saved registers in r0-r2
4477 (always true in the default ABI). */
4478 if (saved_int_regs
& 0x7)
4482 /* Can't be done if interworking with Thumb, and any registers have been
4484 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
4487 /* On StrongARM, conditional returns are expensive if they aren't
4488 taken and multiple registers have been stacked. */
4489 if (iscond
&& arm_tune_strongarm
)
4491 /* Conditional return when just the LR is stored is a simple
4492 conditional-load instruction, that's not expensive. */
4493 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
4497 && arm_pic_register
!= INVALID_REGNUM
4498 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4502 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4503 several instructions if anything needs to be popped. Armv8.1-M Mainline
4504 also needs several instructions to save and restore FP context. */
4505 if (IS_CMSE_ENTRY (func_type
) && (saved_int_regs
|| TARGET_HAVE_FPCXT_CMSE
))
4508 /* If there are saved registers but the LR isn't saved, then we need
4509 two instructions for the return. */
4510 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4513 /* Can't be done if any of the VFP regs are pushed,
4514 since this also requires an insn. */
4515 if (TARGET_VFP_BASE
)
4516 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4517 if (reg_needs_saving_p (regno
))
4520 if (TARGET_REALLY_IWMMXT
)
4521 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4522 if (reg_needs_saving_p (regno
))
4528 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4529 shrink-wrapping if possible. This is the case if we need to emit a
4530 prologue, which we can test by looking at the offsets. */
4532 use_simple_return_p (void)
4534 arm_stack_offsets
*offsets
;
4536 /* Note this function can be called before or after reload. */
4537 if (!reload_completed
)
4538 arm_compute_frame_layout ();
4540 offsets
= arm_get_frame_offsets ();
4541 return offsets
->outgoing_args
!= 0;
4544 /* Return TRUE if int I is a valid immediate ARM constant. */
4547 const_ok_for_arm (HOST_WIDE_INT i
)
4551 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4552 be all zero, or all one. */
4553 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4554 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4555 != ((~(unsigned HOST_WIDE_INT
) 0)
4556 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4559 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4561 /* Fast return for 0 and small values. We must do this for zero, since
4562 the code below can't handle that one case. */
4563 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4566 /* Get the number of trailing zeros. */
4567 lowbit
= ffs((int) i
) - 1;
4569 /* Only even shifts are allowed in ARM mode so round down to the
4570 nearest even number. */
4574 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4579 /* Allow rotated constants in ARM mode. */
4581 && ((i
& ~0xc000003f) == 0
4582 || (i
& ~0xf000000f) == 0
4583 || (i
& ~0xfc000003) == 0))
4586 else if (TARGET_THUMB2
)
4590 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4593 if (i
== v
|| i
== (v
| (v
<< 8)))
4596 /* Allow repeated pattern 0xXY00XY00. */
4602 else if (TARGET_HAVE_MOVT
)
4604 /* Thumb-1 Targets with MOVT. */
4614 /* Return true if I is a valid constant for the operation CODE. */
4616 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4618 if (const_ok_for_arm (i
))
4624 /* See if we can use movw. */
4625 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4628 /* Otherwise, try mvn. */
4629 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4632 /* See if we can use addw or subw. */
4634 && ((i
& 0xfffff000) == 0
4635 || ((-i
) & 0xfffff000) == 0))
4656 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4658 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4664 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4668 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4675 /* Return true if I is a valid di mode constant for the operation CODE. */
4677 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4679 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4680 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4681 rtx hi
= GEN_INT (hi_val
);
4682 rtx lo
= GEN_INT (lo_val
);
4692 return const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF
4693 || const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF;
4695 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4702 /* Emit a sequence of insns to handle a large constant.
4703 CODE is the code of the operation required, it can be any of SET, PLUS,
4704 IOR, AND, XOR, MINUS;
4705 MODE is the mode in which the operation is being performed;
4706 VAL is the integer to operate on;
4707 SOURCE is the other operand (a register, or a null-pointer for SET);
4708 SUBTARGETS means it is safe to create scratch registers if that will
4709 either produce a simpler sequence, or we will want to cse the values.
4710 Return value is the number of insns emitted. */
4712 /* ??? Tweak this for thumb2. */
4714 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4715 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4719 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4720 cond
= COND_EXEC_TEST (PATTERN (insn
));
4724 if (subtargets
|| code
== SET
4725 || (REG_P (target
) && REG_P (source
)
4726 && REGNO (target
) != REGNO (source
)))
4728 /* After arm_reorg has been called, we can't fix up expensive
4729 constants by pushing them into memory so we must synthesize
4730 them in-line, regardless of the cost. This is only likely to
4731 be more costly on chips that have load delay slots and we are
4732 compiling without running the scheduler (so no splitting
4733 occurred before the final instruction emission).
4735 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4737 if (!cfun
->machine
->after_arm_reorg
4739 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4741 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4746 /* Currently SET is the only monadic value for CODE, all
4747 the rest are diadic. */
4748 if (TARGET_USE_MOVT
)
4749 arm_emit_movpair (target
, GEN_INT (val
));
4751 emit_set_insn (target
, GEN_INT (val
));
4757 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4759 if (TARGET_USE_MOVT
)
4760 arm_emit_movpair (temp
, GEN_INT (val
));
4762 emit_set_insn (temp
, GEN_INT (val
));
4764 /* For MINUS, the value is subtracted from, since we never
4765 have subtraction of a constant. */
4767 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4769 emit_set_insn (target
,
4770 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4776 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4780 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4781 ARM/THUMB2 immediates, and add up to VAL.
4782 Thr function return value gives the number of insns required. */
4784 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4785 struct four_ints
*return_sequence
)
4787 int best_consecutive_zeros
= 0;
4791 struct four_ints tmp_sequence
;
4793 /* If we aren't targeting ARM, the best place to start is always at
4794 the bottom, otherwise look more closely. */
4797 for (i
= 0; i
< 32; i
+= 2)
4799 int consecutive_zeros
= 0;
4801 if (!(val
& (3 << i
)))
4803 while ((i
< 32) && !(val
& (3 << i
)))
4805 consecutive_zeros
+= 2;
4808 if (consecutive_zeros
> best_consecutive_zeros
)
4810 best_consecutive_zeros
= consecutive_zeros
;
4811 best_start
= i
- consecutive_zeros
;
4818 /* So long as it won't require any more insns to do so, it's
4819 desirable to emit a small constant (in bits 0...9) in the last
4820 insn. This way there is more chance that it can be combined with
4821 a later addressing insn to form a pre-indexed load or store
4822 operation. Consider:
4824 *((volatile int *)0xe0000100) = 1;
4825 *((volatile int *)0xe0000110) = 2;
4827 We want this to wind up as:
4831 str rB, [rA, #0x100]
4833 str rB, [rA, #0x110]
4835 rather than having to synthesize both large constants from scratch.
4837 Therefore, we calculate how many insns would be required to emit
4838 the constant starting from `best_start', and also starting from
4839 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4840 yield a shorter sequence, we may as well use zero. */
4841 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4843 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4845 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4846 if (insns2
<= insns1
)
4848 *return_sequence
= tmp_sequence
;
4856 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4858 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4859 struct four_ints
*return_sequence
, int i
)
4861 int remainder
= val
& 0xffffffff;
4864 /* Try and find a way of doing the job in either two or three
4867 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4868 location. We start at position I. This may be the MSB, or
4869 optimial_immediate_sequence may have positioned it at the largest block
4870 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4871 wrapping around to the top of the word when we drop off the bottom.
4872 In the worst case this code should produce no more than four insns.
4874 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4875 constants, shifted to any arbitrary location. We should always start
4880 unsigned int b1
, b2
, b3
, b4
;
4881 unsigned HOST_WIDE_INT result
;
4884 gcc_assert (insns
< 4);
4889 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4890 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4893 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4894 /* We can use addw/subw for the last 12 bits. */
4898 /* Use an 8-bit shifted/rotated immediate. */
4902 result
= remainder
& ((0x0ff << end
)
4903 | ((i
< end
) ? (0xff >> (32 - end
))
4910 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4911 arbitrary shifts. */
4912 i
-= TARGET_ARM
? 2 : 1;
4916 /* Next, see if we can do a better job with a thumb2 replicated
4919 We do it this way around to catch the cases like 0x01F001E0 where
4920 two 8-bit immediates would work, but a replicated constant would
4923 TODO: 16-bit constants that don't clear all the bits, but still win.
4924 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4927 b1
= (remainder
& 0xff000000) >> 24;
4928 b2
= (remainder
& 0x00ff0000) >> 16;
4929 b3
= (remainder
& 0x0000ff00) >> 8;
4930 b4
= remainder
& 0xff;
4934 /* The 8-bit immediate already found clears b1 (and maybe b2),
4935 but must leave b3 and b4 alone. */
4937 /* First try to find a 32-bit replicated constant that clears
4938 almost everything. We can assume that we can't do it in one,
4939 or else we wouldn't be here. */
4940 unsigned int tmp
= b1
& b2
& b3
& b4
;
4941 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4943 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4944 + (tmp
== b3
) + (tmp
== b4
);
4946 && (matching_bytes
>= 3
4947 || (matching_bytes
== 2
4948 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4950 /* At least 3 of the bytes match, and the fourth has at
4951 least as many bits set, or two of the bytes match
4952 and it will only require one more insn to finish. */
4960 /* Second, try to find a 16-bit replicated constant that can
4961 leave three of the bytes clear. If b2 or b4 is already
4962 zero, then we can. If the 8-bit from above would not
4963 clear b2 anyway, then we still win. */
4964 else if (b1
== b3
&& (!b2
|| !b4
4965 || (remainder
& 0x00ff0000 & ~result
)))
4967 result
= remainder
& 0xff00ff00;
4973 /* The 8-bit immediate already found clears b2 (and maybe b3)
4974 and we don't get here unless b1 is alredy clear, but it will
4975 leave b4 unchanged. */
4977 /* If we can clear b2 and b4 at once, then we win, since the
4978 8-bits couldn't possibly reach that far. */
4981 result
= remainder
& 0x00ff00ff;
4987 return_sequence
->i
[insns
++] = result
;
4988 remainder
&= ~result
;
4990 if (code
== SET
|| code
== MINUS
)
4998 /* Emit an instruction with the indicated PATTERN. If COND is
4999 non-NULL, conditionalize the execution of the instruction on COND
5003 emit_constant_insn (rtx cond
, rtx pattern
)
5006 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
5007 emit_insn (pattern
);
5010 /* As above, but extra parameter GENERATE which, if clear, suppresses
5014 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
5015 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
5016 int subtargets
, int generate
)
5020 int final_invert
= 0;
5022 int set_sign_bit_copies
= 0;
5023 int clear_sign_bit_copies
= 0;
5024 int clear_zero_bit_copies
= 0;
5025 int set_zero_bit_copies
= 0;
5026 int insns
= 0, neg_insns
, inv_insns
;
5027 unsigned HOST_WIDE_INT temp1
, temp2
;
5028 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
5029 struct four_ints
*immediates
;
5030 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
5032 /* Find out which operations are safe for a given CODE. Also do a quick
5033 check for degenerate cases; these can occur when DImode operations
5046 if (remainder
== 0xffffffff)
5049 emit_constant_insn (cond
,
5050 gen_rtx_SET (target
,
5051 GEN_INT (ARM_SIGN_EXTEND (val
))));
5057 if (reload_completed
&& rtx_equal_p (target
, source
))
5061 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5070 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
5073 if (remainder
== 0xffffffff)
5075 if (reload_completed
&& rtx_equal_p (target
, source
))
5078 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5087 if (reload_completed
&& rtx_equal_p (target
, source
))
5090 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5094 if (remainder
== 0xffffffff)
5097 emit_constant_insn (cond
,
5098 gen_rtx_SET (target
,
5099 gen_rtx_NOT (mode
, source
)));
5106 /* We treat MINUS as (val - source), since (source - val) is always
5107 passed as (source + (-val)). */
5111 emit_constant_insn (cond
,
5112 gen_rtx_SET (target
,
5113 gen_rtx_NEG (mode
, source
)));
5116 if (const_ok_for_arm (val
))
5119 emit_constant_insn (cond
,
5120 gen_rtx_SET (target
,
5121 gen_rtx_MINUS (mode
, GEN_INT (val
),
5132 /* If we can do it in one insn get out quickly. */
5133 if (const_ok_for_op (val
, code
))
5136 emit_constant_insn (cond
,
5137 gen_rtx_SET (target
,
5139 ? gen_rtx_fmt_ee (code
, mode
, source
,
5145 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5147 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
5148 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
5152 if (mode
== SImode
&& i
== 16)
5153 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5155 emit_constant_insn (cond
,
5156 gen_zero_extendhisi2
5157 (target
, gen_lowpart (HImode
, source
)));
5159 /* Extz only supports SImode, but we can coerce the operands
5161 emit_constant_insn (cond
,
5162 gen_extzv_t2 (gen_lowpart (SImode
, target
),
5163 gen_lowpart (SImode
, source
),
5164 GEN_INT (i
), const0_rtx
));
5170 /* Calculate a few attributes that may be useful for specific
5172 /* Count number of leading zeros. */
5173 for (i
= 31; i
>= 0; i
--)
5175 if ((remainder
& (1 << i
)) == 0)
5176 clear_sign_bit_copies
++;
5181 /* Count number of leading 1's. */
5182 for (i
= 31; i
>= 0; i
--)
5184 if ((remainder
& (1 << i
)) != 0)
5185 set_sign_bit_copies
++;
5190 /* Count number of trailing zero's. */
5191 for (i
= 0; i
<= 31; i
++)
5193 if ((remainder
& (1 << i
)) == 0)
5194 clear_zero_bit_copies
++;
5199 /* Count number of trailing 1's. */
5200 for (i
= 0; i
<= 31; i
++)
5202 if ((remainder
& (1 << i
)) != 0)
5203 set_zero_bit_copies
++;
5211 /* See if we can do this by sign_extending a constant that is known
5212 to be negative. This is a good, way of doing it, since the shift
5213 may well merge into a subsequent insn. */
5214 if (set_sign_bit_copies
> 1)
5216 if (const_ok_for_arm
5217 (temp1
= ARM_SIGN_EXTEND (remainder
5218 << (set_sign_bit_copies
- 1))))
5222 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5223 emit_constant_insn (cond
,
5224 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5225 emit_constant_insn (cond
,
5226 gen_ashrsi3 (target
, new_src
,
5227 GEN_INT (set_sign_bit_copies
- 1)));
5231 /* For an inverted constant, we will need to set the low bits,
5232 these will be shifted out of harm's way. */
5233 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
5234 if (const_ok_for_arm (~temp1
))
5238 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5239 emit_constant_insn (cond
,
5240 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5241 emit_constant_insn (cond
,
5242 gen_ashrsi3 (target
, new_src
,
5243 GEN_INT (set_sign_bit_copies
- 1)));
5249 /* See if we can calculate the value as the difference between two
5250 valid immediates. */
5251 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
5253 int topshift
= clear_sign_bit_copies
& ~1;
5255 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
5256 & (0xff000000 >> topshift
));
5258 /* If temp1 is zero, then that means the 9 most significant
5259 bits of remainder were 1 and we've caused it to overflow.
5260 When topshift is 0 we don't need to do anything since we
5261 can borrow from 'bit 32'. */
5262 if (temp1
== 0 && topshift
!= 0)
5263 temp1
= 0x80000000 >> (topshift
- 1);
5265 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
5267 if (const_ok_for_arm (temp2
))
5271 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5272 emit_constant_insn (cond
,
5273 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5274 emit_constant_insn (cond
,
5275 gen_addsi3 (target
, new_src
,
5283 /* See if we can generate this by setting the bottom (or the top)
5284 16 bits, and then shifting these into the other half of the
5285 word. We only look for the simplest cases, to do more would cost
5286 too much. Be careful, however, not to generate this when the
5287 alternative would take fewer insns. */
5288 if (val
& 0xffff0000)
5290 temp1
= remainder
& 0xffff0000;
5291 temp2
= remainder
& 0x0000ffff;
5293 /* Overlaps outside this range are best done using other methods. */
5294 for (i
= 9; i
< 24; i
++)
5296 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
5297 && !const_ok_for_arm (temp2
))
5299 rtx new_src
= (subtargets
5300 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
5302 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
5303 source
, subtargets
, generate
);
5311 gen_rtx_ASHIFT (mode
, source
,
5318 /* Don't duplicate cases already considered. */
5319 for (i
= 17; i
< 24; i
++)
5321 if (((temp1
| (temp1
>> i
)) == remainder
)
5322 && !const_ok_for_arm (temp1
))
5324 rtx new_src
= (subtargets
5325 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
5327 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
5328 source
, subtargets
, generate
);
5333 gen_rtx_SET (target
,
5336 gen_rtx_LSHIFTRT (mode
, source
,
5347 /* If we have IOR or XOR, and the constant can be loaded in a
5348 single instruction, and we can find a temporary to put it in,
5349 then this can be done in two instructions instead of 3-4. */
5351 /* TARGET can't be NULL if SUBTARGETS is 0 */
5352 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
5354 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
5358 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5360 emit_constant_insn (cond
,
5361 gen_rtx_SET (sub
, GEN_INT (val
)));
5362 emit_constant_insn (cond
,
5363 gen_rtx_SET (target
,
5364 gen_rtx_fmt_ee (code
, mode
,
5375 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5376 and the remainder 0s for e.g. 0xfff00000)
5377 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5379 This can be done in 2 instructions by using shifts with mov or mvn.
5384 mvn r0, r0, lsr #12 */
5385 if (set_sign_bit_copies
> 8
5386 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
5390 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5391 rtx shift
= GEN_INT (set_sign_bit_copies
);
5397 gen_rtx_ASHIFT (mode
,
5402 gen_rtx_SET (target
,
5404 gen_rtx_LSHIFTRT (mode
, sub
,
5411 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5413 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5415 For eg. r0 = r0 | 0xfff
5420 if (set_zero_bit_copies
> 8
5421 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
5425 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5426 rtx shift
= GEN_INT (set_zero_bit_copies
);
5432 gen_rtx_LSHIFTRT (mode
,
5437 gen_rtx_SET (target
,
5439 gen_rtx_ASHIFT (mode
, sub
,
5445 /* This will never be reached for Thumb2 because orn is a valid
5446 instruction. This is for Thumb1 and the ARM 32 bit cases.
5448 x = y | constant (such that ~constant is a valid constant)
5450 x = ~(~y & ~constant).
5452 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
5456 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5457 emit_constant_insn (cond
,
5459 gen_rtx_NOT (mode
, source
)));
5462 sub
= gen_reg_rtx (mode
);
5463 emit_constant_insn (cond
,
5465 gen_rtx_AND (mode
, source
,
5467 emit_constant_insn (cond
,
5468 gen_rtx_SET (target
,
5469 gen_rtx_NOT (mode
, sub
)));
5476 /* See if two shifts will do 2 or more insn's worth of work. */
5477 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
5479 HOST_WIDE_INT shift_mask
= ((0xffffffff
5480 << (32 - clear_sign_bit_copies
))
5483 if ((remainder
| shift_mask
) != 0xffffffff)
5485 HOST_WIDE_INT new_val
5486 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5490 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5491 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
5492 new_src
, source
, subtargets
, 1);
5497 rtx targ
= subtargets
? NULL_RTX
: target
;
5498 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5499 targ
, source
, subtargets
, 0);
5505 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5506 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5508 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5509 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5515 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5517 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5519 if ((remainder
| shift_mask
) != 0xffffffff)
5521 HOST_WIDE_INT new_val
5522 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5525 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5527 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5528 new_src
, source
, subtargets
, 1);
5533 rtx targ
= subtargets
? NULL_RTX
: target
;
5535 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5536 targ
, source
, subtargets
, 0);
5542 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5543 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5545 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5546 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5558 /* Calculate what the instruction sequences would be if we generated it
5559 normally, negated, or inverted. */
5561 /* AND cannot be split into multiple insns, so invert and use BIC. */
5564 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5567 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5572 if (can_invert
|| final_invert
)
5573 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5578 immediates
= &pos_immediates
;
5580 /* Is the negated immediate sequence more efficient? */
5581 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5584 immediates
= &neg_immediates
;
5589 /* Is the inverted immediate sequence more efficient?
5590 We must allow for an extra NOT instruction for XOR operations, although
5591 there is some chance that the final 'mvn' will get optimized later. */
5592 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5595 immediates
= &inv_immediates
;
5603 /* Now output the chosen sequence as instructions. */
5606 for (i
= 0; i
< insns
; i
++)
5608 rtx new_src
, temp1_rtx
;
5610 temp1
= immediates
->i
[i
];
5612 if (code
== SET
|| code
== MINUS
)
5613 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5614 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5615 new_src
= gen_reg_rtx (mode
);
5621 else if (can_negate
)
5624 temp1
= trunc_int_for_mode (temp1
, mode
);
5625 temp1_rtx
= GEN_INT (temp1
);
5629 else if (code
== MINUS
)
5630 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5632 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5634 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5639 can_negate
= can_invert
;
5643 else if (code
== MINUS
)
5651 emit_constant_insn (cond
, gen_rtx_SET (target
,
5652 gen_rtx_NOT (mode
, source
)));
5659 /* Return TRUE if op is a constant where both the low and top words are
5660 suitable for RSB/RSC instructions. This is never true for Thumb, since
5661 we do not have RSC in that case. */
5663 arm_const_double_prefer_rsbs_rsc (rtx op
)
5665 /* Thumb lacks RSC, so we never prefer that sequence. */
5666 if (TARGET_THUMB
|| !CONST_INT_P (op
))
5668 HOST_WIDE_INT hi
, lo
;
5669 lo
= UINTVAL (op
) & 0xffffffffULL
;
5670 hi
= UINTVAL (op
) >> 32;
5671 return const_ok_for_arm (lo
) && const_ok_for_arm (hi
);
5674 /* Canonicalize a comparison so that we are more likely to recognize it.
5675 This can be done for a few constant compares, where we can make the
5676 immediate value easier to load. */
5679 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5680 bool op0_preserve_value
)
5683 unsigned HOST_WIDE_INT i
, maxval
;
5685 mode
= GET_MODE (*op0
);
5686 if (mode
== VOIDmode
)
5687 mode
= GET_MODE (*op1
);
5689 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5691 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5692 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5693 either reversed or (for constant OP1) adjusted to GE/LT.
5694 Similarly for GTU/LEU in Thumb mode. */
5698 if (*code
== GT
|| *code
== LE
5699 || *code
== GTU
|| *code
== LEU
)
5701 /* Missing comparison. First try to use an available
5703 if (CONST_INT_P (*op1
))
5712 /* Try to convert to GE/LT, unless that would be more
5714 if (!arm_const_double_by_immediates (GEN_INT (i
+ 1))
5715 && arm_const_double_prefer_rsbs_rsc (*op1
))
5717 *op1
= GEN_INT (i
+ 1);
5718 *code
= *code
== GT
? GE
: LT
;
5722 /* GT maxval is always false, LE maxval is always true.
5723 We can't fold that away here as we must make a
5724 comparison, but we can fold them to comparisons
5725 with the same result that can be handled:
5726 op0 GT maxval -> op0 LT minval
5727 op0 LE maxval -> op0 GE minval
5728 where minval = (-maxval - 1). */
5729 *op1
= GEN_INT (-maxval
- 1);
5730 *code
= *code
== GT
? LT
: GE
;
5736 if (i
!= ~((unsigned HOST_WIDE_INT
) 0))
5738 /* Try to convert to GEU/LTU, unless that would
5739 be more expensive. */
5740 if (!arm_const_double_by_immediates (GEN_INT (i
+ 1))
5741 && arm_const_double_prefer_rsbs_rsc (*op1
))
5743 *op1
= GEN_INT (i
+ 1);
5744 *code
= *code
== GTU
? GEU
: LTU
;
5748 /* GTU ~0 is always false, LEU ~0 is always true.
5749 We can't fold that away here as we must make a
5750 comparison, but we can fold them to comparisons
5751 with the same result that can be handled:
5752 op0 GTU ~0 -> op0 LTU 0
5753 op0 LEU ~0 -> op0 GEU 0. */
5755 *code
= *code
== GTU
? LTU
: GEU
;
5764 if (!op0_preserve_value
)
5766 std::swap (*op0
, *op1
);
5767 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5773 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5774 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5775 to facilitate possible combining with a cmp into 'ands'. */
5777 && GET_CODE (*op0
) == ZERO_EXTEND
5778 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5779 && GET_MODE (XEXP (*op0
, 0)) == QImode
5780 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5781 && subreg_lowpart_p (XEXP (*op0
, 0))
5782 && *op1
== const0_rtx
)
5783 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5786 /* Comparisons smaller than DImode. Only adjust comparisons against
5787 an out-of-range constant. */
5788 if (!CONST_INT_P (*op1
)
5789 || const_ok_for_arm (INTVAL (*op1
))
5790 || const_ok_for_arm (- INTVAL (*op1
)))
5804 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5806 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5807 *code
= *code
== GT
? GE
: LT
;
5815 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5817 *op1
= GEN_INT (i
- 1);
5818 *code
= *code
== GE
? GT
: LE
;
5825 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5826 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5828 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5829 *code
= *code
== GTU
? GEU
: LTU
;
5837 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5839 *op1
= GEN_INT (i
- 1);
5840 *code
= *code
== GEU
? GTU
: LEU
;
5851 /* Define how to find the value returned by a function. */
5854 arm_function_value(const_tree type
, const_tree func
,
5855 bool outgoing ATTRIBUTE_UNUSED
)
5858 int unsignedp ATTRIBUTE_UNUSED
;
5859 rtx r ATTRIBUTE_UNUSED
;
5861 mode
= TYPE_MODE (type
);
5863 if (TARGET_AAPCS_BASED
)
5864 return aapcs_allocate_return_reg (mode
, type
, func
);
5866 /* Promote integer types. */
5867 if (INTEGRAL_TYPE_P (type
))
5868 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5870 /* Promotes small structs returned in a register to full-word size
5871 for big-endian AAPCS. */
5872 if (arm_return_in_msb (type
))
5874 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5875 if (size
% UNITS_PER_WORD
!= 0)
5877 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5878 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
5882 return arm_libcall_value_1 (mode
);
5885 /* libcall hashtable helpers. */
5887 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5889 static inline hashval_t
hash (const rtx_def
*);
5890 static inline bool equal (const rtx_def
*, const rtx_def
*);
5891 static inline void remove (rtx_def
*);
5895 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5897 return rtx_equal_p (p1
, p2
);
5901 libcall_hasher::hash (const rtx_def
*p1
)
5903 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5906 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5909 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5911 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5915 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5917 static bool init_done
= false;
5918 static libcall_table_type
*libcall_htab
= NULL
;
5924 libcall_htab
= new libcall_table_type (31);
5925 add_libcall (libcall_htab
,
5926 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5927 add_libcall (libcall_htab
,
5928 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5929 add_libcall (libcall_htab
,
5930 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5931 add_libcall (libcall_htab
,
5932 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5934 add_libcall (libcall_htab
,
5935 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5936 add_libcall (libcall_htab
,
5937 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5938 add_libcall (libcall_htab
,
5939 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5940 add_libcall (libcall_htab
,
5941 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5943 add_libcall (libcall_htab
,
5944 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5945 add_libcall (libcall_htab
,
5946 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5947 add_libcall (libcall_htab
,
5948 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5949 add_libcall (libcall_htab
,
5950 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5951 add_libcall (libcall_htab
,
5952 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5953 add_libcall (libcall_htab
,
5954 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5955 add_libcall (libcall_htab
,
5956 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5957 add_libcall (libcall_htab
,
5958 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5959 add_libcall (libcall_htab
,
5960 convert_optab_libfunc (sfix_optab
, SImode
, SFmode
));
5961 add_libcall (libcall_htab
,
5962 convert_optab_libfunc (ufix_optab
, SImode
, SFmode
));
5964 /* Values from double-precision helper functions are returned in core
5965 registers if the selected core only supports single-precision
5966 arithmetic, even if we are using the hard-float ABI. The same is
5967 true for single-precision helpers except in case of MVE, because in
5968 MVE we will be using the hard-float ABI on a CPU which doesn't support
5969 single-precision operations in hardware. In MVE the following check
5970 enables use of emulation for the single-precision arithmetic
5972 if (TARGET_HAVE_MVE
)
5974 add_libcall (libcall_htab
, optab_libfunc (add_optab
, SFmode
));
5975 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, SFmode
));
5976 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, SFmode
));
5977 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, SFmode
));
5978 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, SFmode
));
5979 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, SFmode
));
5980 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, SFmode
));
5981 add_libcall (libcall_htab
, optab_libfunc (le_optab
, SFmode
));
5982 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, SFmode
));
5983 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, SFmode
));
5984 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, SFmode
));
5986 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5987 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5988 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5989 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5990 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5991 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5992 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5993 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5994 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5995 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5996 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5997 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5999 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
6001 add_libcall (libcall_htab
,
6002 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
6005 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
6009 arm_libcall_value_1 (machine_mode mode
)
6011 if (TARGET_AAPCS_BASED
)
6012 return aapcs_libcall_value (mode
);
6013 else if (TARGET_IWMMXT_ABI
6014 && arm_vector_mode_supported_p (mode
))
6015 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
6017 return gen_rtx_REG (mode
, ARG_REGISTER (1));
6020 /* Define how to find the value returned by a library function
6021 assuming the value has mode MODE. */
6024 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
6026 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
6027 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6029 /* The following libcalls return their result in integer registers,
6030 even though they return a floating point value. */
6031 if (arm_libcall_uses_aapcs_base (libcall
))
6032 return gen_rtx_REG (mode
, ARG_REGISTER(1));
6036 return arm_libcall_value_1 (mode
);
6039 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
6042 arm_function_value_regno_p (const unsigned int regno
)
6044 if (regno
== ARG_REGISTER (1)
6046 && TARGET_AAPCS_BASED
6047 && TARGET_HARD_FLOAT
6048 && regno
== FIRST_VFP_REGNUM
)
6049 || (TARGET_IWMMXT_ABI
6050 && regno
== FIRST_IWMMXT_REGNUM
))
6056 /* Determine the amount of memory needed to store the possible return
6057 registers of an untyped call. */
6059 arm_apply_result_size (void)
6065 if (TARGET_HARD_FLOAT_ABI
)
6067 if (TARGET_IWMMXT_ABI
)
6074 /* Decide whether TYPE should be returned in memory (true)
6075 or in a register (false). FNTYPE is the type of the function making
6078 arm_return_in_memory (const_tree type
, const_tree fntype
)
6082 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
6084 if (TARGET_AAPCS_BASED
)
6086 /* Simple, non-aggregate types (ie not including vectors and
6087 complex) are always returned in a register (or registers).
6088 We don't care about which register here, so we can short-cut
6089 some of the detail. */
6090 if (!AGGREGATE_TYPE_P (type
)
6091 && TREE_CODE (type
) != VECTOR_TYPE
6092 && TREE_CODE (type
) != COMPLEX_TYPE
)
6095 /* Any return value that is no larger than one word can be
6097 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
6100 /* Check any available co-processors to see if they accept the
6101 type as a register candidate (VFP, for example, can return
6102 some aggregates in consecutive registers). These aren't
6103 available if the call is variadic. */
6104 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
6107 /* Vector values should be returned using ARM registers, not
6108 memory (unless they're over 16 bytes, which will break since
6109 we only have four call-clobbered registers to play with). */
6110 if (TREE_CODE (type
) == VECTOR_TYPE
)
6111 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
6113 /* The rest go in memory. */
6117 if (TREE_CODE (type
) == VECTOR_TYPE
)
6118 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
6120 if (!AGGREGATE_TYPE_P (type
) &&
6121 (TREE_CODE (type
) != VECTOR_TYPE
))
6122 /* All simple types are returned in registers. */
6125 if (arm_abi
!= ARM_ABI_APCS
)
6127 /* ATPCS and later return aggregate types in memory only if they are
6128 larger than a word (or are variable size). */
6129 return (size
< 0 || size
> UNITS_PER_WORD
);
6132 /* For the arm-wince targets we choose to be compatible with Microsoft's
6133 ARM and Thumb compilers, which always return aggregates in memory. */
6135 /* All structures/unions bigger than one word are returned in memory.
6136 Also catch the case where int_size_in_bytes returns -1. In this case
6137 the aggregate is either huge or of variable size, and in either case
6138 we will want to return it via memory and not in a register. */
6139 if (size
< 0 || size
> UNITS_PER_WORD
)
6142 if (TREE_CODE (type
) == RECORD_TYPE
)
6146 /* For a struct the APCS says that we only return in a register
6147 if the type is 'integer like' and every addressable element
6148 has an offset of zero. For practical purposes this means
6149 that the structure can have at most one non bit-field element
6150 and that this element must be the first one in the structure. */
6152 /* Find the first field, ignoring non FIELD_DECL things which will
6153 have been created by C++. */
6154 /* NOTE: This code is deprecated and has not been updated to handle
6155 DECL_FIELD_ABI_IGNORED. */
6156 for (field
= TYPE_FIELDS (type
);
6157 field
&& TREE_CODE (field
) != FIELD_DECL
;
6158 field
= DECL_CHAIN (field
))
6162 return false; /* An empty structure. Allowed by an extension to ANSI C. */
6164 /* Check that the first field is valid for returning in a register. */
6166 /* ... Floats are not allowed */
6167 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
6170 /* ... Aggregates that are not themselves valid for returning in
6171 a register are not allowed. */
6172 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
6175 /* Now check the remaining fields, if any. Only bitfields are allowed,
6176 since they are not addressable. */
6177 for (field
= DECL_CHAIN (field
);
6179 field
= DECL_CHAIN (field
))
6181 if (TREE_CODE (field
) != FIELD_DECL
)
6184 if (!DECL_BIT_FIELD_TYPE (field
))
6191 if (TREE_CODE (type
) == UNION_TYPE
)
6195 /* Unions can be returned in registers if every element is
6196 integral, or can be returned in an integer register. */
6197 for (field
= TYPE_FIELDS (type
);
6199 field
= DECL_CHAIN (field
))
6201 if (TREE_CODE (field
) != FIELD_DECL
)
6204 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
6207 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
6213 #endif /* not ARM_WINCE */
6215 /* Return all other types in memory. */
6219 const struct pcs_attribute_arg
6223 } pcs_attribute_args
[] =
6225 {"aapcs", ARM_PCS_AAPCS
},
6226 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
6228 /* We could recognize these, but changes would be needed elsewhere
6229 * to implement them. */
6230 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
6231 {"atpcs", ARM_PCS_ATPCS
},
6232 {"apcs", ARM_PCS_APCS
},
6234 {NULL
, ARM_PCS_UNKNOWN
}
6238 arm_pcs_from_attribute (tree attr
)
6240 const struct pcs_attribute_arg
*ptr
;
6243 /* Get the value of the argument. */
6244 if (TREE_VALUE (attr
) == NULL_TREE
6245 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
6246 return ARM_PCS_UNKNOWN
;
6248 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
6250 /* Check it against the list of known arguments. */
6251 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
6252 if (streq (arg
, ptr
->arg
))
6255 /* An unrecognized interrupt type. */
6256 return ARM_PCS_UNKNOWN
;
6259 /* Get the PCS variant to use for this call. TYPE is the function's type
6260 specification, DECL is the specific declartion. DECL may be null if
6261 the call could be indirect or if this is a library call. */
6263 arm_get_pcs_model (const_tree type
, const_tree decl ATTRIBUTE_UNUSED
)
6265 bool user_convention
= false;
6266 enum arm_pcs user_pcs
= arm_pcs_default
;
6271 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
6274 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
6275 user_convention
= true;
6278 if (TARGET_AAPCS_BASED
)
6280 /* Detect varargs functions. These always use the base rules
6281 (no argument is ever a candidate for a co-processor
6283 bool base_rules
= stdarg_p (type
);
6285 if (user_convention
)
6287 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
6288 sorry ("non-AAPCS derived PCS variant");
6289 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
6290 error ("variadic functions must use the base AAPCS variant");
6294 return ARM_PCS_AAPCS
;
6295 else if (user_convention
)
6298 /* Unfortunately, this is not safe and can lead to wrong code
6299 being generated (PR96882). Not all calls into the back-end
6300 pass the DECL, so it is unsafe to make any PCS-changing
6301 decisions based on it. In particular the RETURN_IN_MEMORY
6302 hook is only ever passed a TYPE. This needs revisiting to
6303 see if there are any partial improvements that can be
6305 else if (decl
&& flag_unit_at_a_time
)
6307 /* Local functions never leak outside this compilation unit,
6308 so we are free to use whatever conventions are
6310 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6311 cgraph_node
*local_info_node
6312 = cgraph_node::local_info_node (CONST_CAST_TREE (decl
));
6313 if (local_info_node
&& local_info_node
->local
)
6314 return ARM_PCS_AAPCS_LOCAL
;
6318 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
6319 sorry ("PCS variant");
6321 /* For everything else we use the target's default. */
6322 return arm_pcs_default
;
6327 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6328 const_tree fntype ATTRIBUTE_UNUSED
,
6329 rtx libcall ATTRIBUTE_UNUSED
,
6330 const_tree fndecl ATTRIBUTE_UNUSED
)
6332 /* Record the unallocated VFP registers. */
6333 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
6334 pcum
->aapcs_vfp_reg_alloc
= 0;
6337 /* Bitmasks that indicate whether earlier versions of GCC would have
6338 taken a different path through the ABI logic. This should result in
6339 a -Wpsabi warning if the earlier path led to a different ABI decision.
6341 WARN_PSABI_EMPTY_CXX17_BASE
6342 Indicates that the type includes an artificial empty C++17 base field
6343 that, prior to GCC 10.1, would prevent the type from being treated as
6344 a HFA or HVA. See PR94711 for details.
6346 WARN_PSABI_NO_UNIQUE_ADDRESS
6347 Indicates that the type includes an empty [[no_unique_address]] field
6348 that, prior to GCC 10.1, would prevent the type from being treated as
6350 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE
= 1U << 0;
6351 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS
= 1U << 1;
6352 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD
= 1U << 2;
6354 /* Walk down the type tree of TYPE counting consecutive base elements.
6355 If *MODEP is VOIDmode, then set it to the first valid floating point
6356 type. If a non-floating point type is found, or if a floating point
6357 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6358 otherwise return the count in the sub-tree.
6360 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6361 function has changed its behavior relative to earlier versions of GCC.
6362 Normally the argument should be nonnull and point to a zero-initialized
6363 variable. The function then records whether the ABI decision might
6364 be affected by a known fix to the ABI logic, setting the associated
6365 WARN_PSABI_* bits if so.
6367 When the argument is instead a null pointer, the function tries to
6368 simulate the behavior of GCC before all such ABI fixes were made.
6369 This is useful to check whether the function returns something
6370 different after the ABI fixes. */
6372 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
,
6373 unsigned int *warn_psabi_flags
)
6378 switch (TREE_CODE (type
))
6381 mode
= TYPE_MODE (type
);
6382 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
&& mode
!= BFmode
)
6385 if (*modep
== VOIDmode
)
6394 mode
= TYPE_MODE (TREE_TYPE (type
));
6395 if (mode
!= DFmode
&& mode
!= SFmode
)
6398 if (*modep
== VOIDmode
)
6407 /* Use V2SImode and V4SImode as representatives of all 64-bit
6408 and 128-bit vector types, whether or not those modes are
6409 supported with the present options. */
6410 size
= int_size_in_bytes (type
);
6423 if (*modep
== VOIDmode
)
6426 /* Vector modes are considered to be opaque: two vectors are
6427 equivalent for the purposes of being homogeneous aggregates
6428 if they are the same size. */
6437 tree index
= TYPE_DOMAIN (type
);
6439 /* Can't handle incomplete types nor sizes that are not
6441 if (!COMPLETE_TYPE_P (type
)
6442 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6445 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
,
6449 || !TYPE_MAX_VALUE (index
)
6450 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
6451 || !TYPE_MIN_VALUE (index
)
6452 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
6456 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
6457 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
6459 /* There must be no padding. */
6460 if (wi::to_wide (TYPE_SIZE (type
))
6461 != count
* GET_MODE_BITSIZE (*modep
))
6473 /* Can't handle incomplete types nor sizes that are not
6475 if (!COMPLETE_TYPE_P (type
)
6476 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6479 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6481 if (TREE_CODE (field
) != FIELD_DECL
)
6484 if (DECL_FIELD_ABI_IGNORED (field
))
6486 /* See whether this is something that earlier versions of
6487 GCC failed to ignore. */
6489 if (lookup_attribute ("no_unique_address",
6490 DECL_ATTRIBUTES (field
)))
6491 flag
= WARN_PSABI_NO_UNIQUE_ADDRESS
;
6492 else if (cxx17_empty_base_field_p (field
))
6493 flag
= WARN_PSABI_EMPTY_CXX17_BASE
;
6495 /* No compatibility problem. */
6498 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6499 if (warn_psabi_flags
)
6501 *warn_psabi_flags
|= flag
;
6505 /* A zero-width bitfield may affect layout in some
6506 circumstances, but adds no members. The determination
6507 of whether or not a type is an HFA is performed after
6508 layout is complete, so if the type still looks like an
6509 HFA afterwards, it is still classed as one. This is
6510 potentially an ABI break for the hard-float ABI. */
6511 else if (DECL_BIT_FIELD (field
)
6512 && integer_zerop (DECL_SIZE (field
)))
6514 /* Prior to GCC-12 these fields were striped early,
6515 hiding them from the back-end entirely and
6516 resulting in the correct behaviour for argument
6517 passing. Simulate that old behaviour without
6518 generating a warning. */
6519 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field
))
6521 if (warn_psabi_flags
)
6523 *warn_psabi_flags
|= WARN_PSABI_ZERO_WIDTH_BITFIELD
;
6528 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
,
6535 /* There must be no padding. */
6536 if (wi::to_wide (TYPE_SIZE (type
))
6537 != count
* GET_MODE_BITSIZE (*modep
))
6544 case QUAL_UNION_TYPE
:
6546 /* These aren't very interesting except in a degenerate case. */
6551 /* Can't handle incomplete types nor sizes that are not
6553 if (!COMPLETE_TYPE_P (type
)
6554 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6557 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6559 if (TREE_CODE (field
) != FIELD_DECL
)
6562 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
,
6566 count
= count
> sub_count
? count
: sub_count
;
6569 /* There must be no padding. */
6570 if (wi::to_wide (TYPE_SIZE (type
))
6571 != count
* GET_MODE_BITSIZE (*modep
))
6584 /* Return true if PCS_VARIANT should use VFP registers. */
6586 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
6588 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
6590 static bool seen_thumb1_vfp
= false;
6592 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
6594 sorry ("Thumb-1 %<hard-float%> VFP ABI");
6595 /* sorry() is not immediately fatal, so only display this once. */
6596 seen_thumb1_vfp
= true;
6602 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
6605 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
6606 (TARGET_VFP_DOUBLE
|| !is_double
));
6609 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6610 suitable for passing or returning in VFP registers for the PCS
6611 variant selected. If it is, then *BASE_MODE is updated to contain
6612 a machine mode describing each element of the argument's type and
6613 *COUNT to hold the number of such elements. */
6615 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
6616 machine_mode mode
, const_tree type
,
6617 machine_mode
*base_mode
, int *count
)
6619 machine_mode new_mode
= VOIDmode
;
6621 /* If we have the type information, prefer that to working things
6622 out from the mode. */
6625 unsigned int warn_psabi_flags
= 0;
6626 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
,
6628 if (ag_count
> 0 && ag_count
<= 4)
6630 static unsigned last_reported_type_uid
;
6631 unsigned uid
= TYPE_UID (TYPE_MAIN_VARIANT (type
));
6635 && uid
!= last_reported_type_uid
6636 && ((alt
= aapcs_vfp_sub_candidate (type
, &new_mode
, NULL
))
6640 = CHANGES_ROOT_URL
"gcc-10/changes.html#empty_base";
6642 = CHANGES_ROOT_URL
"gcc-12/changes.html#zero_width_bitfields";
6643 gcc_assert (alt
== -1);
6644 last_reported_type_uid
= uid
;
6645 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6647 if (warn_psabi_flags
& WARN_PSABI_NO_UNIQUE_ADDRESS
)
6648 inform (input_location
, "parameter passing for argument of "
6649 "type %qT with %<[[no_unique_address]]%> members "
6650 "changed %{in GCC 10.1%}",
6651 TYPE_MAIN_VARIANT (type
), url10
);
6652 else if (warn_psabi_flags
& WARN_PSABI_EMPTY_CXX17_BASE
)
6653 inform (input_location
, "parameter passing for argument of "
6654 "type %qT when C++17 is enabled changed to match "
6655 "C++14 %{in GCC 10.1%}",
6656 TYPE_MAIN_VARIANT (type
), url10
);
6657 else if (warn_psabi_flags
& WARN_PSABI_ZERO_WIDTH_BITFIELD
)
6658 inform (input_location
, "parameter passing for argument of "
6659 "type %qT changed %{in GCC 12.1%}",
6660 TYPE_MAIN_VARIANT (type
), url12
);
6667 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
6668 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6669 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6674 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6677 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
6683 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
6686 *base_mode
= new_mode
;
6688 if (TARGET_GENERAL_REGS_ONLY
)
6689 error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6696 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
6697 machine_mode mode
, const_tree type
)
6699 int count ATTRIBUTE_UNUSED
;
6700 machine_mode ag_mode ATTRIBUTE_UNUSED
;
6702 if (!use_vfp_abi (pcs_variant
, false))
6704 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6709 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6712 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6715 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6716 &pcum
->aapcs_vfp_rmode
,
6717 &pcum
->aapcs_vfp_rcount
);
6720 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6721 for the behaviour of this function. */
6724 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6725 const_tree type ATTRIBUTE_UNUSED
)
6728 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6729 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6730 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6733 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6734 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6736 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6738 || (mode
== TImode
&& ! (TARGET_NEON
|| TARGET_HAVE_MVE
))
6739 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6742 int rcount
= pcum
->aapcs_vfp_rcount
;
6744 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6746 if (!(TARGET_NEON
|| TARGET_HAVE_MVE
))
6748 /* Avoid using unsupported vector modes. */
6749 if (rmode
== V2SImode
)
6751 else if (rmode
== V4SImode
)
6758 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6759 for (i
= 0; i
< rcount
; i
++)
6761 rtx tmp
= gen_rtx_REG (rmode
,
6762 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6763 tmp
= gen_rtx_EXPR_LIST
6765 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6766 XVECEXP (par
, 0, i
) = tmp
;
6769 pcum
->aapcs_reg
= par
;
6772 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6778 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6779 comment there for the behaviour of this function. */
6782 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6784 const_tree type ATTRIBUTE_UNUSED
)
6786 if (!use_vfp_abi (pcs_variant
, false))
6790 || (GET_MODE_CLASS (mode
) == MODE_INT
6791 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6792 && !(TARGET_NEON
|| TARGET_HAVE_MVE
)))
6795 machine_mode ag_mode
;
6800 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6803 if (!(TARGET_NEON
|| TARGET_HAVE_MVE
))
6805 if (ag_mode
== V2SImode
)
6807 else if (ag_mode
== V4SImode
)
6813 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6814 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6815 for (i
= 0; i
< count
; i
++)
6817 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6818 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6819 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6820 XVECEXP (par
, 0, i
) = tmp
;
6826 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6830 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6831 machine_mode mode ATTRIBUTE_UNUSED
,
6832 const_tree type ATTRIBUTE_UNUSED
)
6834 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6835 pcum
->aapcs_vfp_reg_alloc
= 0;
6839 #define AAPCS_CP(X) \
6841 aapcs_ ## X ## _cum_init, \
6842 aapcs_ ## X ## _is_call_candidate, \
6843 aapcs_ ## X ## _allocate, \
6844 aapcs_ ## X ## _is_return_candidate, \
6845 aapcs_ ## X ## _allocate_return_reg, \
6846 aapcs_ ## X ## _advance \
6849 /* Table of co-processors that can be used to pass arguments in
6850 registers. Idealy no arugment should be a candidate for more than
6851 one co-processor table entry, but the table is processed in order
6852 and stops after the first match. If that entry then fails to put
6853 the argument into a co-processor register, the argument will go on
6857 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6858 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6860 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6861 BLKmode) is a candidate for this co-processor's registers; this
6862 function should ignore any position-dependent state in
6863 CUMULATIVE_ARGS and only use call-type dependent information. */
6864 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6866 /* Return true if the argument does get a co-processor register; it
6867 should set aapcs_reg to an RTX of the register allocated as is
6868 required for a return from FUNCTION_ARG. */
6869 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6871 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6872 be returned in this co-processor's registers. */
6873 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6875 /* Allocate and return an RTX element to hold the return type of a call. This
6876 routine must not fail and will only be called if is_return_candidate
6877 returned true with the same parameters. */
6878 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6880 /* Finish processing this argument and prepare to start processing
6882 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6883 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6891 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6896 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6897 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6904 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6906 /* We aren't passed a decl, so we can't check that a call is local.
6907 However, it isn't clear that that would be a win anyway, since it
6908 might limit some tail-calling opportunities. */
6909 enum arm_pcs pcs_variant
;
6913 const_tree fndecl
= NULL_TREE
;
6915 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6918 fntype
= TREE_TYPE (fntype
);
6921 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6924 pcs_variant
= arm_pcs_default
;
6926 if (pcs_variant
!= ARM_PCS_AAPCS
)
6930 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6931 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6940 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6943 /* We aren't passed a decl, so we can't check that a call is local.
6944 However, it isn't clear that that would be a win anyway, since it
6945 might limit some tail-calling opportunities. */
6946 enum arm_pcs pcs_variant
;
6947 int unsignedp ATTRIBUTE_UNUSED
;
6951 const_tree fndecl
= NULL_TREE
;
6953 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6956 fntype
= TREE_TYPE (fntype
);
6959 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6962 pcs_variant
= arm_pcs_default
;
6964 /* Promote integer types. */
6965 if (type
&& INTEGRAL_TYPE_P (type
))
6966 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6968 if (pcs_variant
!= ARM_PCS_AAPCS
)
6972 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6973 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6975 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6979 /* Promotes small structs returned in a register to full-word size
6980 for big-endian AAPCS. */
6981 if (type
&& arm_return_in_msb (type
))
6983 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6984 if (size
% UNITS_PER_WORD
!= 0)
6986 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6987 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
6991 return gen_rtx_REG (mode
, R0_REGNUM
);
6995 aapcs_libcall_value (machine_mode mode
)
6997 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6998 && GET_MODE_SIZE (mode
) <= 4)
7001 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
7004 /* Lay out a function argument using the AAPCS rules. The rule
7005 numbers referred to here are those in the AAPCS. */
7007 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
7008 const_tree type
, bool named
)
7013 /* We only need to do this once per argument. */
7014 if (pcum
->aapcs_arg_processed
)
7017 pcum
->aapcs_arg_processed
= true;
7019 /* Special case: if named is false then we are handling an incoming
7020 anonymous argument which is on the stack. */
7024 /* Is this a potential co-processor register candidate? */
7025 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
7027 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
7028 pcum
->aapcs_cprc_slot
= slot
;
7030 /* We don't have to apply any of the rules from part B of the
7031 preparation phase, these are handled elsewhere in the
7036 /* A Co-processor register candidate goes either in its own
7037 class of registers or on the stack. */
7038 if (!pcum
->aapcs_cprc_failed
[slot
])
7040 /* C1.cp - Try to allocate the argument to co-processor
7042 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
7045 /* C2.cp - Put the argument on the stack and note that we
7046 can't assign any more candidates in this slot. We also
7047 need to note that we have allocated stack space, so that
7048 we won't later try to split a non-cprc candidate between
7049 core registers and the stack. */
7050 pcum
->aapcs_cprc_failed
[slot
] = true;
7051 pcum
->can_split
= false;
7054 /* We didn't get a register, so this argument goes on the
7056 gcc_assert (pcum
->can_split
== false);
7061 /* C3 - For double-word aligned arguments, round the NCRN up to the
7062 next even number. */
7063 ncrn
= pcum
->aapcs_ncrn
;
7066 int res
= arm_needs_doubleword_align (mode
, type
);
7067 /* Only warn during RTL expansion of call stmts, otherwise we would
7068 warn e.g. during gimplification even on functions that will be
7069 always inlined, and we'd warn multiple times. Don't warn when
7070 called in expand_function_start either, as we warn instead in
7071 arm_function_arg_boundary in that case. */
7072 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
7073 inform (input_location
, "parameter passing for argument of type "
7074 "%qT changed in GCC 7.1", type
);
7079 nregs
= ARM_NUM_REGS2(mode
, type
);
7081 /* Sigh, this test should really assert that nregs > 0, but a GCC
7082 extension allows empty structs and then gives them empty size; it
7083 then allows such a structure to be passed by value. For some of
7084 the code below we have to pretend that such an argument has
7085 non-zero size so that we 'locate' it correctly either in
7086 registers or on the stack. */
7087 gcc_assert (nregs
>= 0);
7089 nregs2
= nregs
? nregs
: 1;
7091 /* C4 - Argument fits entirely in core registers. */
7092 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
7094 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
7095 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
7099 /* C5 - Some core registers left and there are no arguments already
7100 on the stack: split this argument between the remaining core
7101 registers and the stack. */
7102 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
7104 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
7105 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
7106 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
7110 /* C6 - NCRN is set to 4. */
7111 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
7113 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
7117 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7118 for a call to a function whose data type is FNTYPE.
7119 For a library call, FNTYPE is NULL. */
7121 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
7123 tree fndecl ATTRIBUTE_UNUSED
)
7125 /* Long call handling. */
7127 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
7129 pcum
->pcs_variant
= arm_pcs_default
;
7131 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7133 if (arm_libcall_uses_aapcs_base (libname
))
7134 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
7136 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
7137 pcum
->aapcs_reg
= NULL_RTX
;
7138 pcum
->aapcs_partial
= 0;
7139 pcum
->aapcs_arg_processed
= false;
7140 pcum
->aapcs_cprc_slot
= -1;
7141 pcum
->can_split
= true;
7143 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
7147 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
7149 pcum
->aapcs_cprc_failed
[i
] = false;
7150 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
7158 /* On the ARM, the offset starts at 0. */
7160 pcum
->iwmmxt_nregs
= 0;
7161 pcum
->can_split
= true;
7163 /* Varargs vectors are treated the same as long long.
7164 named_count avoids having to change the way arm handles 'named' */
7165 pcum
->named_count
= 0;
7168 if (TARGET_REALLY_IWMMXT
&& fntype
)
7172 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
7174 fn_arg
= TREE_CHAIN (fn_arg
))
7175 pcum
->named_count
+= 1;
7177 if (! pcum
->named_count
)
7178 pcum
->named_count
= INT_MAX
;
7182 /* Return 2 if double word alignment is required for argument passing,
7183 but wasn't required before the fix for PR88469.
7184 Return 1 if double word alignment is required for argument passing.
7185 Return -1 if double word alignment used to be required for argument
7186 passing before PR77728 ABI fix, but is not required anymore.
7187 Return 0 if double word alignment is not required and wasn't requried
7190 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
7193 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
7195 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
7196 if (!AGGREGATE_TYPE_P (type
))
7197 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
7199 /* Array types: Use member alignment of element type. */
7200 if (TREE_CODE (type
) == ARRAY_TYPE
)
7201 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
7205 /* Record/aggregate types: Use greatest member alignment of any member.
7207 Note that we explicitly consider zero-sized fields here, even though
7208 they don't map to AAPCS machine types. For example, in:
7210 struct __attribute__((aligned(8))) empty {};
7213 [[no_unique_address]] empty e;
7217 "s" contains only one Fundamental Data Type (the int field)
7218 but gains 8-byte alignment and size thanks to "e". */
7219 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7220 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
7222 if (TREE_CODE (field
) == FIELD_DECL
)
7225 /* Before PR77728 fix, we were incorrectly considering also
7226 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7227 Make sure we can warn about that with -Wpsabi. */
7230 else if (TREE_CODE (field
) == FIELD_DECL
7231 && DECL_BIT_FIELD_TYPE (field
)
7232 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field
)) > PARM_BOUNDARY
)
7242 /* Determine where to put an argument to a function.
7243 Value is zero to push the argument on the stack,
7244 or a hard register in which to store the argument.
7246 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7247 the preceding args and about the function being called.
7248 ARG is a description of the argument.
7250 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7251 other arguments are passed on the stack. If (NAMED == 0) (which happens
7252 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7253 defined), say it is passed in the stack (function_prologue will
7254 indeed make it pass in the stack if necessary). */
7257 arm_function_arg (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
7259 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7262 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7263 a call insn (op3 of a call_value insn). */
7264 if (arg
.end_marker_p ())
7267 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7269 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7270 return pcum
->aapcs_reg
;
7273 /* Varargs vectors are treated the same as long long.
7274 named_count avoids having to change the way arm handles 'named' */
7275 if (TARGET_IWMMXT_ABI
7276 && arm_vector_mode_supported_p (arg
.mode
)
7277 && pcum
->named_count
> pcum
->nargs
+ 1)
7279 if (pcum
->iwmmxt_nregs
<= 9)
7280 return gen_rtx_REG (arg
.mode
,
7281 pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
7284 pcum
->can_split
= false;
7289 /* Put doubleword aligned quantities in even register pairs. */
7290 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
7292 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
7293 if (res
< 0 && warn_psabi
)
7294 inform (input_location
, "parameter passing for argument of type "
7295 "%qT changed in GCC 7.1", arg
.type
);
7299 if (res
> 1 && warn_psabi
)
7300 inform (input_location
, "parameter passing for argument of type "
7301 "%qT changed in GCC 9.1", arg
.type
);
7305 /* Only allow splitting an arg between regs and memory if all preceding
7306 args were allocated to regs. For args passed by reference we only count
7307 the reference pointer. */
7308 if (pcum
->can_split
)
7311 nregs
= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
7313 if (!arg
.named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
7316 return gen_rtx_REG (arg
.mode
, pcum
->nregs
);
7320 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
7322 if (!ARM_DOUBLEWORD_ALIGN
)
7323 return PARM_BOUNDARY
;
7325 int res
= arm_needs_doubleword_align (mode
, type
);
7326 if (res
< 0 && warn_psabi
)
7327 inform (input_location
, "parameter passing for argument of type %qT "
7328 "changed in GCC 7.1", type
);
7329 if (res
> 1 && warn_psabi
)
7330 inform (input_location
, "parameter passing for argument of type "
7331 "%qT changed in GCC 9.1", type
);
7333 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
7337 arm_arg_partial_bytes (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
7339 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7340 int nregs
= pcum
->nregs
;
7342 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7344 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7345 return pcum
->aapcs_partial
;
7348 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (arg
.mode
))
7351 if (NUM_ARG_REGS
> nregs
7352 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (arg
.mode
, arg
.type
))
7354 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
7359 /* Update the data in PCUM to advance over argument ARG. */
7362 arm_function_arg_advance (cumulative_args_t pcum_v
,
7363 const function_arg_info
&arg
)
7365 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7367 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7369 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7371 if (pcum
->aapcs_cprc_slot
>= 0)
7373 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, arg
.mode
,
7375 pcum
->aapcs_cprc_slot
= -1;
7378 /* Generic stuff. */
7379 pcum
->aapcs_arg_processed
= false;
7380 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
7381 pcum
->aapcs_reg
= NULL_RTX
;
7382 pcum
->aapcs_partial
= 0;
7387 if (arm_vector_mode_supported_p (arg
.mode
)
7388 && pcum
->named_count
> pcum
->nargs
7389 && TARGET_IWMMXT_ABI
)
7390 pcum
->iwmmxt_nregs
+= 1;
7392 pcum
->nregs
+= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
7396 /* Variable sized types are passed by reference. This is a GCC
7397 extension to the ARM ABI. */
7400 arm_pass_by_reference (cumulative_args_t
, const function_arg_info
&arg
)
7402 return arg
.type
&& TREE_CODE (TYPE_SIZE (arg
.type
)) != INTEGER_CST
;
7405 /* Encode the current state of the #pragma [no_]long_calls. */
7408 OFF
, /* No #pragma [no_]long_calls is in effect. */
7409 LONG
, /* #pragma long_calls is in effect. */
7410 SHORT
/* #pragma no_long_calls is in effect. */
7413 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
7416 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7418 arm_pragma_long_calls
= LONG
;
7422 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7424 arm_pragma_long_calls
= SHORT
;
7428 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7430 arm_pragma_long_calls
= OFF
;
7433 /* Handle an attribute requiring a FUNCTION_DECL;
7434 arguments as in struct attribute_spec.handler. */
7436 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
7437 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7439 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7441 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7443 *no_add_attrs
= true;
7449 /* Handle an "interrupt" or "isr" attribute;
7450 arguments as in struct attribute_spec.handler. */
7452 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
7457 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7459 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7461 *no_add_attrs
= true;
7463 else if (TARGET_VFP_BASE
)
7465 warning (OPT_Wattributes
, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7468 /* FIXME: the argument if any is checked for type attributes;
7469 should it be checked for decl ones? */
7473 if (FUNC_OR_METHOD_TYPE_P (*node
))
7475 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
7477 warning (OPT_Wattributes
, "%qE attribute ignored",
7479 *no_add_attrs
= true;
7482 else if (TREE_CODE (*node
) == POINTER_TYPE
7483 && FUNC_OR_METHOD_TYPE_P (TREE_TYPE (*node
))
7484 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
7486 *node
= build_variant_type_copy (*node
);
7487 TREE_TYPE (*node
) = build_type_attribute_variant
7489 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
7490 *no_add_attrs
= true;
7494 /* Possibly pass this attribute on from the type to a decl. */
7495 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
7496 | (int) ATTR_FLAG_FUNCTION_NEXT
7497 | (int) ATTR_FLAG_ARRAY_NEXT
))
7499 *no_add_attrs
= true;
7500 return tree_cons (name
, args
, NULL_TREE
);
7504 warning (OPT_Wattributes
, "%qE attribute ignored",
7513 /* Handle a "pcs" attribute; arguments as in struct
7514 attribute_spec.handler. */
7516 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
7517 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7519 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
7521 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
7522 *no_add_attrs
= true;
7527 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7528 /* Handle the "notshared" attribute. This attribute is another way of
7529 requesting hidden visibility. ARM's compiler supports
7530 "__declspec(notshared)"; we support the same thing via an
7534 arm_handle_notshared_attribute (tree
*node
,
7535 tree name ATTRIBUTE_UNUSED
,
7536 tree args ATTRIBUTE_UNUSED
,
7537 int flags ATTRIBUTE_UNUSED
,
7540 tree decl
= TYPE_NAME (*node
);
7544 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
7545 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
7546 *no_add_attrs
= false;
7552 /* This function returns true if a function with declaration FNDECL and type
7553 FNTYPE uses the stack to pass arguments or return variables and false
7554 otherwise. This is used for functions with the attributes
7555 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7556 diagnostic messages if the stack is used. NAME is the name of the attribute
7560 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
7562 function_args_iterator args_iter
;
7563 CUMULATIVE_ARGS args_so_far_v
;
7564 cumulative_args_t args_so_far
;
7565 bool first_param
= true;
7566 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
7568 /* Error out if any argument is passed on the stack. */
7569 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
7570 args_so_far
= pack_cumulative_args (&args_so_far_v
);
7571 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
7575 prev_arg_type
= arg_type
;
7576 if (VOID_TYPE_P (arg_type
))
7579 function_arg_info
arg (arg_type
, /*named=*/true);
7581 /* ??? We should advance after processing the argument and pass
7582 the argument we're advancing past. */
7583 arm_function_arg_advance (args_so_far
, arg
);
7584 arg_rtx
= arm_function_arg (args_so_far
, arg
);
7585 if (!arg_rtx
|| arm_arg_partial_bytes (args_so_far
, arg
))
7587 error ("%qE attribute not available to functions with arguments "
7588 "passed on the stack", name
);
7591 first_param
= false;
7594 /* Error out for variadic functions since we cannot control how many
7595 arguments will be passed and thus stack could be used. stdarg_p () is not
7596 used for the checking to avoid browsing arguments twice. */
7597 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
7599 error ("%qE attribute not available to functions with variable number "
7600 "of arguments", name
);
7604 /* Error out if return value is passed on the stack. */
7605 ret_type
= TREE_TYPE (fntype
);
7606 if (arm_return_in_memory (ret_type
, fntype
))
7608 error ("%qE attribute not available to functions that return value on "
7615 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7616 function will check whether the attribute is allowed here and will add the
7617 attribute to the function declaration tree or otherwise issue a warning. */
7620 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
7629 *no_add_attrs
= true;
7630 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7635 /* Ignore attribute for function types. */
7636 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7638 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7640 *no_add_attrs
= true;
7646 /* Warn for static linkage functions. */
7647 if (!TREE_PUBLIC (fndecl
))
7649 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
7650 "with static linkage", name
);
7651 *no_add_attrs
= true;
7655 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
7656 TREE_TYPE (fndecl
));
7661 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7662 function will check whether the attribute is allowed here and will add the
7663 attribute to the function type tree or otherwise issue a diagnostic. The
7664 reason we check this at declaration time is to only allow the use of the
7665 attribute with declarations of function pointers and not function
7666 declarations. This function checks NODE is of the expected type and issues
7667 diagnostics otherwise using NAME. If it is not of the expected type
7668 *NO_ADD_ATTRS will be set to true. */
7671 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
7676 tree decl
= NULL_TREE
;
7681 *no_add_attrs
= true;
7682 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7689 fntype
= TREE_TYPE (*node
);
7691 if (VAR_P (*node
) || TREE_CODE (*node
) == TYPE_DECL
)
7697 while (fntype
&& TREE_CODE (fntype
) == POINTER_TYPE
)
7698 fntype
= TREE_TYPE (fntype
);
7700 if ((DECL_P (*node
) && !decl
) || TREE_CODE (fntype
) != FUNCTION_TYPE
)
7702 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
7703 "function pointer", name
);
7704 *no_add_attrs
= true;
7708 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
7713 /* Prevent trees being shared among function types with and without
7714 cmse_nonsecure_call attribute. */
7717 type
= build_distinct_type_copy (TREE_TYPE (decl
));
7718 TREE_TYPE (decl
) = type
;
7722 type
= build_distinct_type_copy (*node
);
7728 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
7731 fntype
= TREE_TYPE (fntype
);
7732 fntype
= build_distinct_type_copy (fntype
);
7733 TREE_TYPE (type
) = fntype
;
7736 /* Construct a type attribute and add it to the function type. */
7737 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
7738 TYPE_ATTRIBUTES (fntype
));
7739 TYPE_ATTRIBUTES (fntype
) = attrs
;
7743 /* Return 0 if the attributes for two types are incompatible, 1 if they
7744 are compatible, and 2 if they are nearly compatible (which causes a
7745 warning to be generated). */
7747 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7751 tree attrs1
= lookup_attribute ("Advanced SIMD type",
7752 TYPE_ATTRIBUTES (type1
));
7753 tree attrs2
= lookup_attribute ("Advanced SIMD type",
7754 TYPE_ATTRIBUTES (type2
));
7755 if (bool (attrs1
) != bool (attrs2
))
7757 if (attrs1
&& !attribute_value_equal (attrs1
, attrs2
))
7760 /* Check for mismatch of non-default calling convention. */
7761 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7764 /* Check for mismatched call attributes. */
7765 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7766 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7767 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7768 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7770 /* Only bother to check if an attribute is defined. */
7771 if (l1
| l2
| s1
| s2
)
7773 /* If one type has an attribute, the other must have the same attribute. */
7774 if ((l1
!= l2
) || (s1
!= s2
))
7777 /* Disallow mixed attributes. */
7778 if ((l1
& s2
) || (l2
& s1
))
7782 /* Check for mismatched ISR attribute. */
7783 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7785 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7786 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7788 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7792 l1
= lookup_attribute ("cmse_nonsecure_call",
7793 TYPE_ATTRIBUTES (type1
)) != NULL
;
7794 l2
= lookup_attribute ("cmse_nonsecure_call",
7795 TYPE_ATTRIBUTES (type2
)) != NULL
;
7803 /* Assigns default attributes to newly defined type. This is used to
7804 set short_call/long_call attributes for function types of
7805 functions defined inside corresponding #pragma scopes. */
7807 arm_set_default_type_attributes (tree type
)
7809 /* Add __attribute__ ((long_call)) to all functions, when
7810 inside #pragma long_calls or __attribute__ ((short_call)),
7811 when inside #pragma no_long_calls. */
7812 if (FUNC_OR_METHOD_TYPE_P (type
))
7814 tree type_attr_list
, attr_name
;
7815 type_attr_list
= TYPE_ATTRIBUTES (type
);
7817 if (arm_pragma_long_calls
== LONG
)
7818 attr_name
= get_identifier ("long_call");
7819 else if (arm_pragma_long_calls
== SHORT
)
7820 attr_name
= get_identifier ("short_call");
7824 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7825 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7829 /* Return true if DECL is known to be linked into section SECTION. */
7832 arm_function_in_section_p (tree decl
, section
*section
)
7834 /* We can only be certain about the prevailing symbol definition. */
7835 if (!decl_binds_to_current_def_p (decl
))
7838 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7839 if (!DECL_SECTION_NAME (decl
))
7841 /* Make sure that we will not create a unique section for DECL. */
7842 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7846 return function_section (decl
) == section
;
7849 /* Return nonzero if a 32-bit "long_call" should be generated for
7850 a call from the current function to DECL. We generate a long_call
7853 a. has an __attribute__((long call))
7854 or b. is within the scope of a #pragma long_calls
7855 or c. the -mlong-calls command line switch has been specified
7857 However we do not generate a long call if the function:
7859 d. has an __attribute__ ((short_call))
7860 or e. is inside the scope of a #pragma no_long_calls
7861 or f. is defined in the same section as the current function. */
7864 arm_is_long_call_p (tree decl
)
7869 return TARGET_LONG_CALLS
;
7871 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7872 if (lookup_attribute ("short_call", attrs
))
7875 /* For "f", be conservative, and only cater for cases in which the
7876 whole of the current function is placed in the same section. */
7877 if (!flag_reorder_blocks_and_partition
7878 && TREE_CODE (decl
) == FUNCTION_DECL
7879 && arm_function_in_section_p (decl
, current_function_section ()))
7882 if (lookup_attribute ("long_call", attrs
))
7885 return TARGET_LONG_CALLS
;
7888 /* Return nonzero if it is ok to make a tail-call to DECL. */
7890 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7892 unsigned long func_type
;
7894 if (cfun
->machine
->sibcall_blocked
)
7899 /* In FDPIC, never tailcall something for which we have no decl:
7900 the target function could be in a different module, requiring
7901 a different FDPIC register value. */
7906 /* Never tailcall something if we are generating code for Thumb-1. */
7910 /* The PIC register is live on entry to VxWorks PLT entries, so we
7911 must make the call before restoring the PIC register. */
7912 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7915 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7916 may be used both as target of the call and base register for restoring
7917 the VFP registers */
7918 if (TARGET_APCS_FRAME
&& TARGET_ARM
7919 && TARGET_HARD_FLOAT
7920 && decl
&& arm_is_long_call_p (decl
))
7923 /* If we are interworking and the function is not declared static
7924 then we can't tail-call it unless we know that it exists in this
7925 compilation unit (since it might be a Thumb routine). */
7926 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7927 && !TREE_ASM_WRITTEN (decl
))
7930 func_type
= arm_current_func_type ();
7931 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7932 if (IS_INTERRUPT (func_type
))
7935 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7936 generated for entry functions themselves. */
7937 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7940 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7941 this would complicate matters for later code generation. */
7942 if (TREE_CODE (exp
) == CALL_EXPR
)
7944 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7945 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7949 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7951 /* Check that the return value locations are the same. For
7952 example that we aren't returning a value from the sibling in
7953 a VFP register but then need to transfer it to a core
7956 tree decl_or_type
= decl
;
7958 /* If it is an indirect function pointer, get the function type. */
7960 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7962 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7963 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7965 if (!rtx_equal_p (a
, b
))
7969 /* Never tailcall if function may be called with a misaligned SP. */
7970 if (IS_STACKALIGN (func_type
))
7973 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7974 references should become a NOP. Don't convert such calls into
7976 if (TARGET_AAPCS_BASED
7977 && arm_abi
== ARM_ABI_AAPCS
7979 && DECL_WEAK (decl
))
7982 /* We cannot do a tailcall for an indirect call by descriptor if all the
7983 argument registers are used because the only register left to load the
7984 address is IP and it will already contain the static chain. */
7985 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7987 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7988 CUMULATIVE_ARGS cum
;
7989 cumulative_args_t cum_v
;
7991 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7992 cum_v
= pack_cumulative_args (&cum
);
7994 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7996 tree type
= TREE_VALUE (t
);
7997 if (!VOID_TYPE_P (type
))
7999 function_arg_info
arg (type
, /*named=*/true);
8000 arm_function_arg_advance (cum_v
, arg
);
8004 function_arg_info
arg (integer_type_node
, /*named=*/true);
8005 if (!arm_function_arg (cum_v
, arg
))
8009 /* Everything else is ok. */
8014 /* Addressing mode support functions. */
8016 /* Return nonzero if X is a legitimate immediate operand when compiling
8017 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
8019 legitimate_pic_operand_p (rtx x
)
8021 if (SYMBOL_REF_P (x
)
8022 || (GET_CODE (x
) == CONST
8023 && GET_CODE (XEXP (x
, 0)) == PLUS
8024 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
8030 /* Record that the current function needs a PIC register. If PIC_REG is null,
8031 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
8032 both case cfun->machine->pic_reg is initialized if we have not already done
8033 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
8034 PIC register is reloaded in the current position of the instruction stream
8035 irregardless of whether it was loaded before. Otherwise, it is only loaded
8036 if not already done so (crtl->uses_pic_offset_table is null). Note that
8037 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
8038 is only supported iff COMPUTE_NOW is false. */
8041 require_pic_register (rtx pic_reg
, bool compute_now
)
8043 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
8045 /* A lot of the logic here is made obscure by the fact that this
8046 routine gets called as part of the rtx cost estimation process.
8047 We don't want those calls to affect any assumptions about the real
8048 function; and further, we can't call entry_of_function() until we
8049 start the real expansion process. */
8050 if (!crtl
->uses_pic_offset_table
|| compute_now
)
8052 gcc_assert (can_create_pseudo_p ()
8053 || (pic_reg
!= NULL_RTX
8055 && GET_MODE (pic_reg
) == Pmode
));
8056 if (arm_pic_register
!= INVALID_REGNUM
8058 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
8060 if (!cfun
->machine
->pic_reg
)
8061 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
8063 /* Play games to avoid marking the function as needing pic
8064 if we are being called as part of the cost-estimation
8066 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
8067 crtl
->uses_pic_offset_table
= 1;
8071 rtx_insn
*seq
, *insn
;
8073 if (pic_reg
== NULL_RTX
)
8074 pic_reg
= gen_reg_rtx (Pmode
);
8075 if (!cfun
->machine
->pic_reg
)
8076 cfun
->machine
->pic_reg
= pic_reg
;
8078 /* Play games to avoid marking the function as needing pic
8079 if we are being called as part of the cost-estimation
8081 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
8083 crtl
->uses_pic_offset_table
= 1;
8086 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
8087 && arm_pic_register
> LAST_LO_REGNUM
8089 emit_move_insn (cfun
->machine
->pic_reg
,
8090 gen_rtx_REG (Pmode
, arm_pic_register
));
8092 arm_load_pic_register (0UL, pic_reg
);
8097 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
8099 INSN_LOCATION (insn
) = prologue_location
;
8101 /* We can be called during expansion of PHI nodes, where
8102 we can't yet emit instructions directly in the final
8103 insn stream. Queue the insns on the entry edge, they will
8104 be committed after everything else is expanded. */
8105 if (currently_expanding_to_rtl
)
8106 insert_insn_on_edge (seq
,
8108 (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
8116 /* Generate insns to calculate the address of ORIG in pic mode. */
8118 calculate_pic_address_constant (rtx reg
, rtx pic_reg
, rtx orig
)
8123 pat
= gen_calculate_pic_address (reg
, pic_reg
, orig
);
8125 /* Make the MEM as close to a constant as possible. */
8126 mem
= SET_SRC (pat
);
8127 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
8128 MEM_READONLY_P (mem
) = 1;
8129 MEM_NOTRAP_P (mem
) = 1;
8131 return emit_insn (pat
);
8134 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
8135 created to hold the result of the load. If not NULL, PIC_REG indicates
8136 which register to use as PIC register, otherwise it is decided by register
8137 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
8138 location in the instruction stream, irregardless of whether it was loaded
8139 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8140 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8142 Returns the register REG into which the PIC load is performed. */
8145 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
, rtx pic_reg
,
8148 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
8150 if (SYMBOL_REF_P (orig
)
8151 || LABEL_REF_P (orig
))
8155 gcc_assert (can_create_pseudo_p ());
8156 reg
= gen_reg_rtx (Pmode
);
8159 /* VxWorks does not impose a fixed gap between segments; the run-time
8160 gap can be different from the object-file gap. We therefore can't
8161 use GOTOFF unless we are absolutely sure that the symbol is in the
8162 same segment as the GOT. Unfortunately, the flexibility of linker
8163 scripts means that we can't be sure of that in general, so assume
8164 that GOTOFF is never valid on VxWorks. */
8165 /* References to weak symbols cannot be resolved locally: they
8166 may be overridden by a non-weak definition at link time. */
8168 if ((LABEL_REF_P (orig
)
8169 || (SYMBOL_REF_P (orig
)
8170 && SYMBOL_REF_LOCAL_P (orig
)
8171 && (SYMBOL_REF_DECL (orig
)
8172 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)
8173 && (!SYMBOL_REF_FUNCTION_P (orig
)
8174 || arm_fdpic_local_funcdesc_p (orig
))))
8176 && arm_pic_data_is_text_relative
)
8177 insn
= arm_pic_static_addr (orig
, reg
);
8180 /* If this function doesn't have a pic register, create one now. */
8181 require_pic_register (pic_reg
, compute_now
);
8183 if (pic_reg
== NULL_RTX
)
8184 pic_reg
= cfun
->machine
->pic_reg
;
8186 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
8189 /* Put a REG_EQUAL note on this insn, so that it can be optimized
8191 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
8195 else if (GET_CODE (orig
) == CONST
)
8199 if (GET_CODE (XEXP (orig
, 0)) == PLUS
8200 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
8203 /* Handle the case where we have: const (UNSPEC_TLS). */
8204 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
8205 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
8208 /* Handle the case where we have:
8209 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
8211 if (GET_CODE (XEXP (orig
, 0)) == PLUS
8212 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
8213 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
8215 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
8221 gcc_assert (can_create_pseudo_p ());
8222 reg
= gen_reg_rtx (Pmode
);
8225 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
8227 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
,
8228 pic_reg
, compute_now
);
8229 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
8230 base
== reg
? 0 : reg
, pic_reg
,
8233 if (CONST_INT_P (offset
))
8235 /* The base register doesn't really matter, we only want to
8236 test the index for the appropriate mode. */
8237 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
8239 gcc_assert (can_create_pseudo_p ());
8240 offset
= force_reg (Pmode
, offset
);
8243 if (CONST_INT_P (offset
))
8244 return plus_constant (Pmode
, base
, INTVAL (offset
));
8247 if (GET_MODE_SIZE (mode
) > 4
8248 && (GET_MODE_CLASS (mode
) == MODE_INT
8249 || TARGET_SOFT_FLOAT
))
8251 emit_insn (gen_addsi3 (reg
, base
, offset
));
8255 return gen_rtx_PLUS (Pmode
, base
, offset
);
8262 /* Generate insns that produce the address of the stack canary */
8264 arm_stack_protect_tls_canary_mem (bool reload
)
8266 rtx tp
= gen_reg_rtx (SImode
);
8268 emit_insn (gen_reload_tp_hard (tp
));
8270 emit_insn (gen_load_tp_hard (tp
));
8272 rtx reg
= gen_reg_rtx (SImode
);
8273 rtx offset
= GEN_INT (arm_stack_protector_guard_offset
);
8274 emit_set_insn (reg
, gen_rtx_PLUS (SImode
, tp
, offset
));
8275 return gen_rtx_MEM (SImode
, reg
);
8279 /* Whether a register is callee saved or not. This is necessary because high
8280 registers are marked as caller saved when optimizing for size on Thumb-1
8281 targets despite being callee saved in order to avoid using them. */
8282 #define callee_saved_reg_p(reg) \
8283 (!call_used_or_fixed_reg_p (reg) \
8284 || (TARGET_THUMB1 && optimize_size \
8285 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8287 /* Return a mask for the call-clobbered low registers that are unused
8288 at the end of the prologue. */
8289 static unsigned long
8290 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8292 unsigned long mask
= 0;
8293 bitmap prologue_live_out
= df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
8295 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
8296 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (prologue_live_out
, reg
))
8297 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
8301 /* Similarly for the start of the epilogue. */
8302 static unsigned long
8303 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8305 unsigned long mask
= 0;
8306 bitmap epilogue_live_in
= df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun
));
8308 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
8309 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (epilogue_live_in
, reg
))
8310 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
8314 /* Find a spare register to use during the prolog of a function. */
8317 thumb_find_work_register (unsigned long pushed_regs_mask
)
8321 unsigned long unused_regs
8322 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8324 /* Check the argument registers first as these are call-used. The
8325 register allocation order means that sometimes r3 might be used
8326 but earlier argument registers might not, so check them all. */
8327 for (reg
= LAST_LO_REGNUM
; reg
>= FIRST_LO_REGNUM
; reg
--)
8328 if (unused_regs
& (1 << (reg
- FIRST_LO_REGNUM
)))
8331 /* Otherwise look for a call-saved register that is going to be pushed. */
8332 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
8333 if (pushed_regs_mask
& (1 << reg
))
8338 /* Thumb-2 can use high regs. */
8339 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
8340 if (pushed_regs_mask
& (1 << reg
))
8343 /* Something went wrong - thumb_compute_save_reg_mask()
8344 should have arranged for a suitable register to be pushed. */
8348 static GTY(()) int pic_labelno
;
8350 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8354 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
, rtx pic_reg
)
8356 rtx l1
, labelno
, pic_tmp
, pic_rtx
;
8358 if (crtl
->uses_pic_offset_table
== 0
8359 || TARGET_SINGLE_PIC_BASE
8363 gcc_assert (flag_pic
);
8365 if (pic_reg
== NULL_RTX
)
8366 pic_reg
= cfun
->machine
->pic_reg
;
8367 if (TARGET_VXWORKS_RTP
)
8369 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
8370 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
8371 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
8373 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
8375 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8376 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
8380 /* We use an UNSPEC rather than a LABEL_REF because this label
8381 never appears in the code stream. */
8383 labelno
= GEN_INT (pic_labelno
++);
8384 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8385 l1
= gen_rtx_CONST (VOIDmode
, l1
);
8387 /* On the ARM the PC register contains 'dot + 8' at the time of the
8388 addition, on the Thumb it is 'dot + 4'. */
8389 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
8390 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
8392 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
8396 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8398 else /* TARGET_THUMB1 */
8400 if (arm_pic_register
!= INVALID_REGNUM
8401 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
8403 /* We will have pushed the pic register, so we should always be
8404 able to find a work register. */
8405 pic_tmp
= gen_rtx_REG (SImode
,
8406 thumb_find_work_register (saved_regs
));
8407 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
8408 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
8409 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
8411 else if (arm_pic_register
!= INVALID_REGNUM
8412 && arm_pic_register
> LAST_LO_REGNUM
8413 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
8415 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8416 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
8417 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
8420 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8424 /* Need to emit this whether or not we obey regdecls,
8425 since setjmp/longjmp can cause life info to screw up. */
8429 /* Try to determine whether an object, referenced via ORIG, will be
8430 placed in the text or data segment. This is used in FDPIC mode, to
8431 decide which relocations to use when accessing ORIG. *IS_READONLY
8432 is set to true if ORIG is a read-only location, false otherwise.
8433 Return true if we could determine the location of ORIG, false
8434 otherwise. *IS_READONLY is valid only when we return true. */
8436 arm_is_segment_info_known (rtx orig
, bool *is_readonly
)
8438 *is_readonly
= false;
8440 if (LABEL_REF_P (orig
))
8442 *is_readonly
= true;
8446 if (SYMBOL_REF_P (orig
))
8448 if (CONSTANT_POOL_ADDRESS_P (orig
))
8450 *is_readonly
= true;
8453 if (SYMBOL_REF_LOCAL_P (orig
)
8454 && !SYMBOL_REF_EXTERNAL_P (orig
)
8455 && SYMBOL_REF_DECL (orig
)
8456 && (!DECL_P (SYMBOL_REF_DECL (orig
))
8457 || !DECL_COMMON (SYMBOL_REF_DECL (orig
))))
8459 tree decl
= SYMBOL_REF_DECL (orig
);
8460 tree init
= VAR_P (decl
)
8461 ? DECL_INITIAL (decl
) : (TREE_CODE (decl
) == CONSTRUCTOR
)
8464 bool named_section
, readonly
;
8466 if (init
&& init
!= error_mark_node
)
8467 reloc
= compute_reloc_for_constant (init
);
8469 named_section
= VAR_P (decl
)
8470 && lookup_attribute ("section", DECL_ATTRIBUTES (decl
));
8471 readonly
= decl_readonly_section (decl
, reloc
);
8473 /* We don't know where the link script will put a named
8474 section, so return false in such a case. */
8478 *is_readonly
= readonly
;
8482 /* We don't know. */
8489 /* Generate code to load the address of a static var when flag_pic is set. */
8491 arm_pic_static_addr (rtx orig
, rtx reg
)
8493 rtx l1
, labelno
, offset_rtx
;
8496 gcc_assert (flag_pic
);
8498 bool is_readonly
= false;
8499 bool info_known
= false;
8502 && SYMBOL_REF_P (orig
)
8503 && !SYMBOL_REF_FUNCTION_P (orig
))
8504 info_known
= arm_is_segment_info_known (orig
, &is_readonly
);
8507 && SYMBOL_REF_P (orig
)
8508 && !SYMBOL_REF_FUNCTION_P (orig
)
8511 /* We don't know where orig is stored, so we have be
8512 pessimistic and use a GOT relocation. */
8513 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
8515 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
8517 else if (TARGET_FDPIC
8518 && SYMBOL_REF_P (orig
)
8519 && (SYMBOL_REF_FUNCTION_P (orig
)
8522 /* We use the GOTOFF relocation. */
8523 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
8525 rtx l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, orig
), UNSPEC_PIC_SYM
);
8526 emit_insn (gen_movsi (reg
, l1
));
8527 insn
= emit_insn (gen_addsi3 (reg
, reg
, pic_reg
));
8531 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8532 PC-relative access. */
8533 /* We use an UNSPEC rather than a LABEL_REF because this label
8534 never appears in the code stream. */
8535 labelno
= GEN_INT (pic_labelno
++);
8536 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8537 l1
= gen_rtx_CONST (VOIDmode
, l1
);
8539 /* On the ARM the PC register contains 'dot + 8' at the time of the
8540 addition, on the Thumb it is 'dot + 4'. */
8541 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
8542 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
8543 UNSPEC_SYMBOL_OFFSET
);
8544 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
8546 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
,
8553 /* Return nonzero if X is valid as an ARM state addressing register. */
8555 arm_address_register_rtx_p (rtx x
, int strict_p
)
8565 return ARM_REGNO_OK_FOR_BASE_P (regno
);
8567 return (regno
<= LAST_ARM_REGNUM
8568 || regno
>= FIRST_PSEUDO_REGISTER
8569 || regno
== FRAME_POINTER_REGNUM
8570 || regno
== ARG_POINTER_REGNUM
);
8573 /* Return TRUE if this rtx is the difference of a symbol and a label,
8574 and will reduce to a PC-relative relocation in the object file.
8575 Expressions like this can be left alone when generating PIC, rather
8576 than forced through the GOT. */
8578 pcrel_constant_p (rtx x
)
8580 if (GET_CODE (x
) == MINUS
)
8581 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
8586 /* Return true if X will surely end up in an index register after next
8589 will_be_in_index_register (const_rtx x
)
8591 /* arm.md: calculate_pic_address will split this into a register. */
8592 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
8595 /* Return nonzero if X is a valid ARM state address operand. */
8597 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
8601 enum rtx_code code
= GET_CODE (x
);
8603 if (arm_address_register_rtx_p (x
, strict_p
))
8606 use_ldrd
= (TARGET_LDRD
8607 && (mode
== DImode
|| mode
== DFmode
));
8609 if (code
== POST_INC
|| code
== PRE_DEC
8610 || ((code
== PRE_INC
|| code
== POST_DEC
)
8611 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8612 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8614 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8615 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8616 && GET_CODE (XEXP (x
, 1)) == PLUS
8617 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8619 rtx addend
= XEXP (XEXP (x
, 1), 1);
8621 /* Don't allow ldrd post increment by register because it's hard
8622 to fixup invalid register choices. */
8624 && GET_CODE (x
) == POST_MODIFY
8628 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
8629 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
8632 /* After reload constants split into minipools will have addresses
8633 from a LABEL_REF. */
8634 else if (reload_completed
8635 && (code
== LABEL_REF
8637 && GET_CODE (XEXP (x
, 0)) == PLUS
8638 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8639 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8642 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
8645 else if (code
== PLUS
)
8647 rtx xop0
= XEXP (x
, 0);
8648 rtx xop1
= XEXP (x
, 1);
8650 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8651 && ((CONST_INT_P (xop1
)
8652 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
8653 || (!strict_p
&& will_be_in_index_register (xop1
))))
8654 || (arm_address_register_rtx_p (xop1
, strict_p
)
8655 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
8659 /* Reload currently can't handle MINUS, so disable this for now */
8660 else if (GET_CODE (x
) == MINUS
)
8662 rtx xop0
= XEXP (x
, 0);
8663 rtx xop1
= XEXP (x
, 1);
8665 return (arm_address_register_rtx_p (xop0
, strict_p
)
8666 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
8670 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8671 && code
== SYMBOL_REF
8672 && CONSTANT_POOL_ADDRESS_P (x
)
8674 && symbol_mentioned_p (get_pool_constant (x
))
8675 && ! pcrel_constant_p (get_pool_constant (x
))))
8681 /* Return true if we can avoid creating a constant pool entry for x. */
8683 can_avoid_literal_pool_for_label_p (rtx x
)
8685 /* Normally we can assign constant values to target registers without
8686 the help of constant pool. But there are cases we have to use constant
8688 1) assign a label to register.
8689 2) sign-extend a 8bit value to 32bit and then assign to register.
8691 Constant pool access in format:
8692 (set (reg r0) (mem (symbol_ref (".LC0"))))
8693 will cause the use of literal pool (later in function arm_reorg).
8694 So here we mark such format as an invalid format, then the compiler
8695 will adjust it into:
8696 (set (reg r0) (symbol_ref (".LC0")))
8697 (set (reg r0) (mem (reg r0))).
8698 No extra register is required, and (mem (reg r0)) won't cause the use
8699 of literal pools. */
8700 if (arm_disable_literal_pool
&& SYMBOL_REF_P (x
)
8701 && CONSTANT_POOL_ADDRESS_P (x
))
8707 /* Return nonzero if X is a valid Thumb-2 address operand. */
8709 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8712 enum rtx_code code
= GET_CODE (x
);
8714 /* If we are dealing with a MVE predicate mode, then treat it as a HImode as
8715 can store and load it like any other 16-bit value. */
8716 if (TARGET_HAVE_MVE
&& VALID_MVE_PRED_MODE (mode
))
8719 if (TARGET_HAVE_MVE
&& VALID_MVE_MODE (mode
))
8720 return mve_vector_mem_operand (mode
, x
, strict_p
);
8722 if (arm_address_register_rtx_p (x
, strict_p
))
8725 use_ldrd
= (TARGET_LDRD
8726 && (mode
== DImode
|| mode
== DFmode
));
8728 if (code
== POST_INC
|| code
== PRE_DEC
8729 || ((code
== PRE_INC
|| code
== POST_DEC
)
8730 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8731 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8733 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8734 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8735 && GET_CODE (XEXP (x
, 1)) == PLUS
8736 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8738 /* Thumb-2 only has autoincrement by constant. */
8739 rtx addend
= XEXP (XEXP (x
, 1), 1);
8740 HOST_WIDE_INT offset
;
8742 if (!CONST_INT_P (addend
))
8745 offset
= INTVAL(addend
);
8746 if (GET_MODE_SIZE (mode
) <= 4)
8747 return (offset
> -256 && offset
< 256);
8749 return (use_ldrd
&& offset
> -1024 && offset
< 1024
8750 && (offset
& 3) == 0);
8753 /* After reload constants split into minipools will have addresses
8754 from a LABEL_REF. */
8755 else if (reload_completed
8756 && (code
== LABEL_REF
8758 && GET_CODE (XEXP (x
, 0)) == PLUS
8759 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8760 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8763 else if (mode
== TImode
8764 || (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
8765 || (TARGET_HAVE_MVE
&& VALID_MVE_STRUCT_MODE (mode
)))
8768 else if (code
== PLUS
)
8770 rtx xop0
= XEXP (x
, 0);
8771 rtx xop1
= XEXP (x
, 1);
8773 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8774 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
8775 || (!strict_p
&& will_be_in_index_register (xop1
))))
8776 || (arm_address_register_rtx_p (xop1
, strict_p
)
8777 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
8780 else if (can_avoid_literal_pool_for_label_p (x
))
8783 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8784 && code
== SYMBOL_REF
8785 && CONSTANT_POOL_ADDRESS_P (x
)
8787 && symbol_mentioned_p (get_pool_constant (x
))
8788 && ! pcrel_constant_p (get_pool_constant (x
))))
8794 /* Return nonzero if INDEX is valid for an address index operand in
8797 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
8800 HOST_WIDE_INT range
;
8801 enum rtx_code code
= GET_CODE (index
);
8803 /* Standard coprocessor addressing modes. */
8804 if (TARGET_HARD_FLOAT
8805 && (mode
== SFmode
|| mode
== DFmode
))
8806 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8807 && INTVAL (index
) > -1024
8808 && (INTVAL (index
) & 3) == 0);
8810 /* For quad modes, we restrict the constant offset to be slightly less
8811 than what the instruction format permits. We do this because for
8812 quad mode moves, we will actually decompose them into two separate
8813 double-mode reads or writes. INDEX must therefore be a valid
8814 (double-mode) offset and so should INDEX+8. */
8815 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8816 return (code
== CONST_INT
8817 && INTVAL (index
) < 1016
8818 && INTVAL (index
) > -1024
8819 && (INTVAL (index
) & 3) == 0);
8821 /* We have no such constraint on double mode offsets, so we permit the
8822 full range of the instruction format. */
8823 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8824 return (code
== CONST_INT
8825 && INTVAL (index
) < 1024
8826 && INTVAL (index
) > -1024
8827 && (INTVAL (index
) & 3) == 0);
8829 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8830 return (code
== CONST_INT
8831 && INTVAL (index
) < 1024
8832 && INTVAL (index
) > -1024
8833 && (INTVAL (index
) & 3) == 0);
8835 if (arm_address_register_rtx_p (index
, strict_p
)
8836 && (GET_MODE_SIZE (mode
) <= 4))
8839 if (mode
== DImode
|| mode
== DFmode
)
8841 if (code
== CONST_INT
)
8843 HOST_WIDE_INT val
= INTVAL (index
);
8845 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8846 If vldr is selected it uses arm_coproc_mem_operand. */
8848 return val
> -256 && val
< 256;
8850 return val
> -4096 && val
< 4092;
8853 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
8856 if (GET_MODE_SIZE (mode
) <= 4
8860 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
8864 rtx xiop0
= XEXP (index
, 0);
8865 rtx xiop1
= XEXP (index
, 1);
8867 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8868 && power_of_two_operand (xiop1
, SImode
))
8869 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8870 && power_of_two_operand (xiop0
, SImode
)));
8872 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
8873 || code
== ASHIFT
|| code
== ROTATERT
)
8875 rtx op
= XEXP (index
, 1);
8877 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8880 && INTVAL (op
) <= 31);
8884 /* For ARM v4 we may be doing a sign-extend operation during the
8890 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
8896 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
8898 return (code
== CONST_INT
8899 && INTVAL (index
) < range
8900 && INTVAL (index
) > -range
);
8903 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8904 index operand. i.e. 1, 2, 4 or 8. */
8906 thumb2_index_mul_operand (rtx op
)
8910 if (!CONST_INT_P (op
))
8914 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
8917 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8919 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
8921 enum rtx_code code
= GET_CODE (index
);
8923 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8924 /* Standard coprocessor addressing modes. */
8926 && (mode
== SFmode
|| mode
== DFmode
))
8927 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8928 /* Thumb-2 allows only > -256 index range for it's core register
8929 load/stores. Since we allow SF/DF in core registers, we have
8930 to use the intersection between -256~4096 (core) and -1024~1024
8932 && INTVAL (index
) > -256
8933 && (INTVAL (index
) & 3) == 0);
8935 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8937 /* For DImode assume values will usually live in core regs
8938 and only allow LDRD addressing modes. */
8939 if (!TARGET_LDRD
|| mode
!= DImode
)
8940 return (code
== CONST_INT
8941 && INTVAL (index
) < 1024
8942 && INTVAL (index
) > -1024
8943 && (INTVAL (index
) & 3) == 0);
8946 /* For quad modes, we restrict the constant offset to be slightly less
8947 than what the instruction format permits. We do this because for
8948 quad mode moves, we will actually decompose them into two separate
8949 double-mode reads or writes. INDEX must therefore be a valid
8950 (double-mode) offset and so should INDEX+8. */
8951 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8952 return (code
== CONST_INT
8953 && INTVAL (index
) < 1016
8954 && INTVAL (index
) > -1024
8955 && (INTVAL (index
) & 3) == 0);
8957 /* We have no such constraint on double mode offsets, so we permit the
8958 full range of the instruction format. */
8959 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8960 return (code
== CONST_INT
8961 && INTVAL (index
) < 1024
8962 && INTVAL (index
) > -1024
8963 && (INTVAL (index
) & 3) == 0);
8965 if (arm_address_register_rtx_p (index
, strict_p
)
8966 && (GET_MODE_SIZE (mode
) <= 4))
8969 if (mode
== DImode
|| mode
== DFmode
)
8971 if (code
== CONST_INT
)
8973 HOST_WIDE_INT val
= INTVAL (index
);
8974 /* Thumb-2 ldrd only has reg+const addressing modes.
8975 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8976 If vldr is selected it uses arm_coproc_mem_operand. */
8978 return IN_RANGE (val
, -1020, 1020) && (val
& 3) == 0;
8980 return IN_RANGE (val
, -255, 4095 - 4);
8988 rtx xiop0
= XEXP (index
, 0);
8989 rtx xiop1
= XEXP (index
, 1);
8991 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8992 && thumb2_index_mul_operand (xiop1
))
8993 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8994 && thumb2_index_mul_operand (xiop0
)));
8996 else if (code
== ASHIFT
)
8998 rtx op
= XEXP (index
, 1);
9000 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
9003 && INTVAL (op
) <= 3);
9006 return (code
== CONST_INT
9007 && INTVAL (index
) < 4096
9008 && INTVAL (index
) > -256);
9011 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
9013 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
9023 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
9025 return (regno
<= LAST_LO_REGNUM
9026 || regno
> LAST_VIRTUAL_REGISTER
9027 || regno
== FRAME_POINTER_REGNUM
9028 || (GET_MODE_SIZE (mode
) >= 4
9029 && (regno
== STACK_POINTER_REGNUM
9030 || regno
>= FIRST_PSEUDO_REGISTER
9031 || x
== hard_frame_pointer_rtx
9032 || x
== arg_pointer_rtx
)));
9035 /* Return nonzero if x is a legitimate index register. This is the case
9036 for any base register that can access a QImode object. */
9038 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
9040 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
9043 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
9045 The AP may be eliminated to either the SP or the FP, so we use the
9046 least common denominator, e.g. SImode, and offsets from 0 to 64.
9048 ??? Verify whether the above is the right approach.
9050 ??? Also, the FP may be eliminated to the SP, so perhaps that
9051 needs special handling also.
9053 ??? Look at how the mips16 port solves this problem. It probably uses
9054 better ways to solve some of these problems.
9056 Although it is not incorrect, we don't accept QImode and HImode
9057 addresses based on the frame pointer or arg pointer until the
9058 reload pass starts. This is so that eliminating such addresses
9059 into stack based ones won't produce impossible code. */
9061 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
9063 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
9066 /* ??? Not clear if this is right. Experiment. */
9067 if (GET_MODE_SIZE (mode
) < 4
9068 && !(reload_in_progress
|| reload_completed
)
9069 && (reg_mentioned_p (frame_pointer_rtx
, x
)
9070 || reg_mentioned_p (arg_pointer_rtx
, x
)
9071 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
9072 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
9073 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
9074 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
9077 /* Accept any base register. SP only in SImode or larger. */
9078 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
9081 /* This is PC relative data before arm_reorg runs. */
9082 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
9084 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
9085 && !arm_disable_literal_pool
)
9088 /* This is PC relative data after arm_reorg runs. */
9089 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
9092 || (GET_CODE (x
) == CONST
9093 && GET_CODE (XEXP (x
, 0)) == PLUS
9094 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
9095 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
9098 /* Post-inc indexing only supported for SImode and larger. */
9099 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
9100 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
9103 else if (GET_CODE (x
) == PLUS
)
9105 /* REG+REG address can be any two index registers. */
9106 /* We disallow FRAME+REG addressing since we know that FRAME
9107 will be replaced with STACK, and SP relative addressing only
9108 permits SP+OFFSET. */
9109 if (GET_MODE_SIZE (mode
) <= 4
9110 && XEXP (x
, 0) != frame_pointer_rtx
9111 && XEXP (x
, 1) != frame_pointer_rtx
9112 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
9113 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
9114 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
9117 /* REG+const has 5-7 bit offset for non-SP registers. */
9118 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
9119 || XEXP (x
, 0) == arg_pointer_rtx
)
9120 && CONST_INT_P (XEXP (x
, 1))
9121 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
9124 /* REG+const has 10-bit offset for SP, but only SImode and
9125 larger is supported. */
9126 /* ??? Should probably check for DI/DFmode overflow here
9127 just like GO_IF_LEGITIMATE_OFFSET does. */
9128 else if (REG_P (XEXP (x
, 0))
9129 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
9130 && GET_MODE_SIZE (mode
) >= 4
9131 && CONST_INT_P (XEXP (x
, 1))
9132 && INTVAL (XEXP (x
, 1)) >= 0
9133 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
9134 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
9137 else if (REG_P (XEXP (x
, 0))
9138 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
9139 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
9140 || VIRTUAL_REGISTER_P (XEXP (x
, 0)))
9141 && GET_MODE_SIZE (mode
) >= 4
9142 && CONST_INT_P (XEXP (x
, 1))
9143 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
9147 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
9148 && GET_MODE_SIZE (mode
) == 4
9150 && CONSTANT_POOL_ADDRESS_P (x
)
9151 && !arm_disable_literal_pool
9153 && symbol_mentioned_p (get_pool_constant (x
))
9154 && ! pcrel_constant_p (get_pool_constant (x
))))
9160 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9161 instruction of mode MODE. */
9163 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
9165 switch (GET_MODE_SIZE (mode
))
9168 return val
>= 0 && val
< 32;
9171 return val
>= 0 && val
< 64 && (val
& 1) == 0;
9175 && (val
+ GET_MODE_SIZE (mode
)) <= 128
9181 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
, code_helper
)
9184 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
9185 else if (TARGET_THUMB2
)
9186 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
9187 else /* if (TARGET_THUMB1) */
9188 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
9191 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9193 Given an rtx X being reloaded into a reg required to be
9194 in class CLASS, return the class of reg to actually use.
9195 In general this is just CLASS, but for the Thumb core registers and
9196 immediate constants we prefer a LO_REGS class or a subset. */
9199 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
9205 if (rclass
== GENERAL_REGS
)
9212 /* Build the SYMBOL_REF for __tls_get_addr. */
9214 static GTY(()) rtx tls_get_addr_libfunc
;
9217 get_tls_get_addr (void)
9219 if (!tls_get_addr_libfunc
)
9220 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
9221 return tls_get_addr_libfunc
;
9225 arm_load_tp (rtx target
)
9228 target
= gen_reg_rtx (SImode
);
9232 /* Can return in any reg. */
9233 emit_insn (gen_load_tp_hard (target
));
9237 /* Always returned in r0. Immediately copy the result into a pseudo,
9238 otherwise other uses of r0 (e.g. setting up function arguments) may
9239 clobber the value. */
9245 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
9246 rtx initial_fdpic_reg
= get_hard_reg_initial_val (Pmode
, FDPIC_REGNUM
);
9248 emit_insn (gen_load_tp_soft_fdpic ());
9251 emit_insn (gen_restore_pic_register_after_call(fdpic_reg
, initial_fdpic_reg
));
9254 emit_insn (gen_load_tp_soft ());
9256 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
9257 emit_move_insn (target
, tmp
);
9263 load_tls_operand (rtx x
, rtx reg
)
9267 if (reg
== NULL_RTX
)
9268 reg
= gen_reg_rtx (SImode
);
9270 tmp
= gen_rtx_CONST (SImode
, x
);
9272 emit_move_insn (reg
, tmp
);
9278 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
9280 rtx label
, labelno
= NULL_RTX
, sum
;
9282 gcc_assert (reloc
!= TLS_DESCSEQ
);
9287 sum
= gen_rtx_UNSPEC (Pmode
,
9288 gen_rtvec (2, x
, GEN_INT (reloc
)),
9293 labelno
= GEN_INT (pic_labelno
++);
9294 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9295 label
= gen_rtx_CONST (VOIDmode
, label
);
9297 sum
= gen_rtx_UNSPEC (Pmode
,
9298 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
9299 GEN_INT (TARGET_ARM
? 8 : 4)),
9302 reg
= load_tls_operand (sum
, reg
);
9305 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
9306 else if (TARGET_ARM
)
9307 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
9309 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
9311 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
9312 LCT_PURE
, /* LCT_CONST? */
9315 rtx_insn
*insns
= get_insns ();
9322 arm_tls_descseq_addr (rtx x
, rtx reg
)
9324 rtx labelno
= GEN_INT (pic_labelno
++);
9325 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9326 rtx sum
= gen_rtx_UNSPEC (Pmode
,
9327 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
9328 gen_rtx_CONST (VOIDmode
, label
),
9329 GEN_INT (!TARGET_ARM
)),
9331 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
9333 emit_insn (gen_tlscall (x
, labelno
));
9335 reg
= gen_reg_rtx (SImode
);
9337 gcc_assert (REGNO (reg
) != R0_REGNUM
);
9339 emit_move_insn (reg
, reg0
);
9346 legitimize_tls_address (rtx x
, rtx reg
)
9348 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
9350 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
9354 case TLS_MODEL_GLOBAL_DYNAMIC
:
9355 if (TARGET_GNU2_TLS
)
9357 gcc_assert (!TARGET_FDPIC
);
9359 reg
= arm_tls_descseq_addr (x
, reg
);
9361 tp
= arm_load_tp (NULL_RTX
);
9363 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
9367 /* Original scheme */
9369 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32_FDPIC
);
9371 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
9372 dest
= gen_reg_rtx (Pmode
);
9373 emit_libcall_block (insns
, dest
, ret
, x
);
9377 case TLS_MODEL_LOCAL_DYNAMIC
:
9378 if (TARGET_GNU2_TLS
)
9380 gcc_assert (!TARGET_FDPIC
);
9382 reg
= arm_tls_descseq_addr (x
, reg
);
9384 tp
= arm_load_tp (NULL_RTX
);
9386 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
9391 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32_FDPIC
);
9393 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
9395 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9396 share the LDM result with other LD model accesses. */
9397 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
9399 dest
= gen_reg_rtx (Pmode
);
9400 emit_libcall_block (insns
, dest
, ret
, eqv
);
9402 /* Load the addend. */
9403 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
9404 GEN_INT (TLS_LDO32
)),
9406 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
9407 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
9411 case TLS_MODEL_INITIAL_EXEC
:
9414 sum
= gen_rtx_UNSPEC (Pmode
,
9415 gen_rtvec (2, x
, GEN_INT (TLS_IE32_FDPIC
)),
9417 reg
= load_tls_operand (sum
, reg
);
9418 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
9419 emit_move_insn (reg
, gen_rtx_MEM (Pmode
, reg
));
9423 labelno
= GEN_INT (pic_labelno
++);
9424 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9425 label
= gen_rtx_CONST (VOIDmode
, label
);
9426 sum
= gen_rtx_UNSPEC (Pmode
,
9427 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
9428 GEN_INT (TARGET_ARM
? 8 : 4)),
9430 reg
= load_tls_operand (sum
, reg
);
9433 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
9434 else if (TARGET_THUMB2
)
9435 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
9438 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
9439 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
9443 tp
= arm_load_tp (NULL_RTX
);
9445 return gen_rtx_PLUS (Pmode
, tp
, reg
);
9447 case TLS_MODEL_LOCAL_EXEC
:
9448 tp
= arm_load_tp (NULL_RTX
);
9450 reg
= gen_rtx_UNSPEC (Pmode
,
9451 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
9453 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
9455 return gen_rtx_PLUS (Pmode
, tp
, reg
);
9462 /* Try machine-dependent ways of modifying an illegitimate address
9463 to be legitimate. If we find one, return the new, valid address. */
9465 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
9467 if (arm_tls_referenced_p (x
))
9471 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
9473 addend
= XEXP (XEXP (x
, 0), 1);
9474 x
= XEXP (XEXP (x
, 0), 0);
9477 if (!SYMBOL_REF_P (x
))
9480 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
9482 x
= legitimize_tls_address (x
, NULL_RTX
);
9486 x
= gen_rtx_PLUS (SImode
, x
, addend
);
9494 return thumb_legitimize_address (x
, orig_x
, mode
);
9496 if (GET_CODE (x
) == PLUS
)
9498 rtx xop0
= XEXP (x
, 0);
9499 rtx xop1
= XEXP (x
, 1);
9501 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
9502 xop0
= force_reg (SImode
, xop0
);
9504 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
9505 && !symbol_mentioned_p (xop1
))
9506 xop1
= force_reg (SImode
, xop1
);
9508 if (ARM_BASE_REGISTER_RTX_P (xop0
)
9509 && CONST_INT_P (xop1
))
9511 HOST_WIDE_INT n
, low_n
;
9515 /* VFP addressing modes actually allow greater offsets, but for
9516 now we just stick with the lowest common denominator. */
9517 if (mode
== DImode
|| mode
== DFmode
)
9529 low_n
= ((mode
) == TImode
? 0
9530 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
9534 base_reg
= gen_reg_rtx (SImode
);
9535 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
9536 emit_move_insn (base_reg
, val
);
9537 x
= plus_constant (Pmode
, base_reg
, low_n
);
9539 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
9540 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
9543 /* XXX We don't allow MINUS any more -- see comment in
9544 arm_legitimate_address_outer_p (). */
9545 else if (GET_CODE (x
) == MINUS
)
9547 rtx xop0
= XEXP (x
, 0);
9548 rtx xop1
= XEXP (x
, 1);
9550 if (CONSTANT_P (xop0
))
9551 xop0
= force_reg (SImode
, xop0
);
9553 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
9554 xop1
= force_reg (SImode
, xop1
);
9556 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
9557 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
9560 /* Make sure to take full advantage of the pre-indexed addressing mode
9561 with absolute addresses which often allows for the base register to
9562 be factorized for multiple adjacent memory references, and it might
9563 even allows for the mini pool to be avoided entirely. */
9564 else if (CONST_INT_P (x
) && optimize
> 0)
9567 HOST_WIDE_INT mask
, base
, index
;
9570 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9571 only use a 8-bit index. So let's use a 12-bit index for
9572 SImode only and hope that arm_gen_constant will enable LDRB
9573 to use more bits. */
9574 bits
= (mode
== SImode
) ? 12 : 8;
9575 mask
= (1 << bits
) - 1;
9576 base
= INTVAL (x
) & ~mask
;
9577 index
= INTVAL (x
) & mask
;
9578 if (TARGET_ARM
&& bit_count (base
& 0xffffffff) > (32 - bits
)/2)
9580 /* It'll most probably be more efficient to generate the
9581 base with more bits set and use a negative index instead.
9582 Don't do this for Thumb as negative offsets are much more
9587 base_reg
= force_reg (SImode
, GEN_INT (base
));
9588 x
= plus_constant (Pmode
, base_reg
, index
);
9593 /* We need to find and carefully transform any SYMBOL and LABEL
9594 references; so go back to the original address expression. */
9595 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9596 false /*compute_now*/);
9598 if (new_x
!= orig_x
)
9606 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9607 to be legitimate. If we find one, return the new, valid address. */
9609 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
9611 if (GET_CODE (x
) == PLUS
9612 && CONST_INT_P (XEXP (x
, 1))
9613 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
9614 || INTVAL (XEXP (x
, 1)) < 0))
9616 rtx xop0
= XEXP (x
, 0);
9617 rtx xop1
= XEXP (x
, 1);
9618 HOST_WIDE_INT offset
= INTVAL (xop1
);
9620 /* Try and fold the offset into a biasing of the base register and
9621 then offsetting that. Don't do this when optimizing for space
9622 since it can cause too many CSEs. */
9623 if (optimize_size
&& offset
>= 0
9624 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
9626 HOST_WIDE_INT delta
;
9629 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
9630 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
9631 delta
= 31 * GET_MODE_SIZE (mode
);
9633 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
9635 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
9637 x
= plus_constant (Pmode
, xop0
, delta
);
9639 else if (offset
< 0 && offset
> -256)
9640 /* Small negative offsets are best done with a subtract before the
9641 dereference, forcing these into a register normally takes two
9643 x
= force_operand (x
, NULL_RTX
);
9646 /* For the remaining cases, force the constant into a register. */
9647 xop1
= force_reg (SImode
, xop1
);
9648 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
9651 else if (GET_CODE (x
) == PLUS
9652 && s_register_operand (XEXP (x
, 1), SImode
)
9653 && !s_register_operand (XEXP (x
, 0), SImode
))
9655 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
9657 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
9662 /* We need to find and carefully transform any SYMBOL and LABEL
9663 references; so go back to the original address expression. */
9664 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9665 false /*compute_now*/);
9667 if (new_x
!= orig_x
)
9674 /* Return TRUE if X contains any TLS symbol references. */
9677 arm_tls_referenced_p (rtx x
)
9679 if (! TARGET_HAVE_TLS
)
9682 subrtx_iterator::array_type array
;
9683 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
9685 const_rtx x
= *iter
;
9686 if (SYMBOL_REF_P (x
) && SYMBOL_REF_TLS_MODEL (x
) != 0)
9688 /* ARM currently does not provide relocations to encode TLS variables
9689 into AArch32 instructions, only data, so there is no way to
9690 currently implement these if a literal pool is disabled. */
9691 if (arm_disable_literal_pool
)
9692 sorry ("accessing thread-local storage is not currently supported "
9693 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9698 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9699 TLS offsets, not real symbol references. */
9700 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9701 iter
.skip_subrtxes ();
9706 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9708 On the ARM, allow any integer (invalid ones are removed later by insn
9709 patterns), nice doubles and symbol_refs which refer to the function's
9712 When generating pic allow anything. */
9715 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
9717 if (GET_CODE (x
) == CONST_VECTOR
&& !neon_make_constant (x
, false))
9720 return flag_pic
|| !label_mentioned_p (x
);
9724 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9726 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9727 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9728 for ARMv8-M Baseline or later the result is valid. */
9729 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
9732 return (CONST_INT_P (x
)
9733 || CONST_DOUBLE_P (x
)
9734 || CONSTANT_ADDRESS_P (x
)
9735 || (TARGET_HAVE_MOVT
&& SYMBOL_REF_P (x
))
9736 /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9737 we build the symbol address with upper/lower
9740 && !label_mentioned_p (x
)
9741 && arm_valid_symbolic_address_p (x
)
9742 && arm_disable_literal_pool
)
9747 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
9749 return (!arm_cannot_force_const_mem (mode
, x
)
9751 ? arm_legitimate_constant_p_1 (mode
, x
)
9752 : thumb_legitimate_constant_p (mode
, x
)));
9755 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9758 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9761 split_const (x
, &base
, &offset
);
9763 if (SYMBOL_REF_P (base
))
9765 /* Function symbols cannot have an offset due to the Thumb bit. */
9766 if ((SYMBOL_REF_FLAGS (base
) & SYMBOL_FLAG_FUNCTION
)
9767 && INTVAL (offset
) != 0)
9770 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9771 && !offset_within_block_p (base
, INTVAL (offset
)))
9774 return arm_tls_referenced_p (x
);
9777 #define REG_OR_SUBREG_REG(X) \
9779 || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9781 #define REG_OR_SUBREG_RTX(X) \
9782 (REG_P (X) ? (X) : SUBREG_REG (X))
9785 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9787 machine_mode mode
= GET_MODE (x
);
9796 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9803 return COSTS_N_INSNS (1);
9806 if (arm_arch6m
&& arm_m_profile_small_mul
)
9807 return COSTS_N_INSNS (32);
9809 if (CONST_INT_P (XEXP (x
, 1)))
9812 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
9819 return COSTS_N_INSNS (2) + cycles
;
9821 return COSTS_N_INSNS (1) + 16;
9824 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9826 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9827 return (COSTS_N_INSNS (words
)
9828 + 4 * ((MEM_P (SET_SRC (x
)))
9829 + MEM_P (SET_DEST (x
))));
9834 if (UINTVAL (x
) < 256
9835 /* 16-bit constant. */
9836 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
9838 if (thumb_shiftable_const (INTVAL (x
)))
9839 return COSTS_N_INSNS (2);
9840 return arm_disable_literal_pool
9842 : COSTS_N_INSNS (3);
9844 else if ((outer
== PLUS
|| outer
== COMPARE
)
9845 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9847 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9848 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9849 return COSTS_N_INSNS (1);
9850 else if (outer
== AND
)
9853 /* This duplicates the tests in the andsi3 expander. */
9854 for (i
= 9; i
<= 31; i
++)
9855 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9856 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9857 return COSTS_N_INSNS (2);
9859 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9860 || outer
== LSHIFTRT
)
9862 return COSTS_N_INSNS (2);
9868 return COSTS_N_INSNS (3);
9886 /* XXX another guess. */
9887 /* Memory costs quite a lot for the first word, but subsequent words
9888 load at the equivalent of a single insn each. */
9889 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9890 + ((SYMBOL_REF_P (x
) && CONSTANT_POOL_ADDRESS_P (x
))
9895 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9901 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
9902 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
9908 return total
+ COSTS_N_INSNS (1);
9910 /* Assume a two-shift sequence. Increase the cost slightly so
9911 we prefer actual shifts over an extend operation. */
9912 return total
+ 1 + COSTS_N_INSNS (2);
9919 /* Estimates the size cost of thumb1 instructions.
9920 For now most of the code is copied from thumb1_rtx_costs. We need more
9921 fine grain tuning when we have more related test cases. */
9923 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9925 machine_mode mode
= GET_MODE (x
);
9934 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9938 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9939 defined by RTL expansion, especially for the expansion of
9941 if ((GET_CODE (XEXP (x
, 0)) == MULT
9942 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
9943 || (GET_CODE (XEXP (x
, 1)) == MULT
9944 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
9945 return COSTS_N_INSNS (2);
9950 return COSTS_N_INSNS (1);
9953 if (CONST_INT_P (XEXP (x
, 1)))
9955 /* Thumb1 mul instruction can't operate on const. We must Load it
9956 into a register first. */
9957 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
9958 /* For the targets which have a very small and high-latency multiply
9959 unit, we prefer to synthesize the mult with up to 5 instructions,
9960 giving a good balance between size and performance. */
9961 if (arm_arch6m
&& arm_m_profile_small_mul
)
9962 return COSTS_N_INSNS (5);
9964 return COSTS_N_INSNS (1) + const_size
;
9966 return COSTS_N_INSNS (1);
9969 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9971 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9972 cost
= COSTS_N_INSNS (words
);
9973 if (satisfies_constraint_J (SET_SRC (x
))
9974 || satisfies_constraint_K (SET_SRC (x
))
9975 /* Too big an immediate for a 2-byte mov, using MOVT. */
9976 || (CONST_INT_P (SET_SRC (x
))
9977 && UINTVAL (SET_SRC (x
)) >= 256
9979 && satisfies_constraint_j (SET_SRC (x
)))
9980 /* thumb1_movdi_insn. */
9981 || ((words
> 1) && MEM_P (SET_SRC (x
))))
9982 cost
+= COSTS_N_INSNS (1);
9988 if (UINTVAL (x
) < 256)
9989 return COSTS_N_INSNS (1);
9990 /* movw is 4byte long. */
9991 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
9992 return COSTS_N_INSNS (2);
9993 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9994 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9995 return COSTS_N_INSNS (2);
9996 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9997 if (thumb_shiftable_const (INTVAL (x
)))
9998 return COSTS_N_INSNS (2);
9999 return arm_disable_literal_pool
10000 ? COSTS_N_INSNS (8)
10001 : COSTS_N_INSNS (3);
10003 else if ((outer
== PLUS
|| outer
== COMPARE
)
10004 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
10006 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
10007 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
10008 return COSTS_N_INSNS (1);
10009 else if (outer
== AND
)
10012 /* This duplicates the tests in the andsi3 expander. */
10013 for (i
= 9; i
<= 31; i
++)
10014 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
10015 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
10016 return COSTS_N_INSNS (2);
10018 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
10019 || outer
== LSHIFTRT
)
10021 return COSTS_N_INSNS (2);
10027 return COSTS_N_INSNS (3);
10041 return COSTS_N_INSNS (1);
10044 return (COSTS_N_INSNS (1)
10045 + COSTS_N_INSNS (1)
10046 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
10047 + ((SYMBOL_REF_P (x
) && CONSTANT_POOL_ADDRESS_P (x
))
10048 ? COSTS_N_INSNS (1) : 0));
10052 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10057 /* XXX still guessing. */
10058 switch (GET_MODE (XEXP (x
, 0)))
10061 return (1 + (mode
== DImode
? 4 : 0)
10062 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
10065 return (4 + (mode
== DImode
? 4 : 0)
10066 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
10069 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
10080 /* Helper function for arm_rtx_costs. If one operand of the OP, a
10081 PLUS, adds the carry flag, then return the other operand. If
10082 neither is a carry, return OP unchanged. */
10084 strip_carry_operation (rtx op
)
10086 gcc_assert (GET_CODE (op
) == PLUS
);
10087 if (arm_carry_operation (XEXP (op
, 0), GET_MODE (op
)))
10088 return XEXP (op
, 1);
10089 else if (arm_carry_operation (XEXP (op
, 1), GET_MODE (op
)))
10090 return XEXP (op
, 0);
10094 /* Helper function for arm_rtx_costs. If the operand is a valid shift
10095 operand, then return the operand that is being shifted. If the shift
10096 is not by a constant, then set SHIFT_REG to point to the operand.
10097 Return NULL if OP is not a shifter operand. */
10099 shifter_op_p (rtx op
, rtx
*shift_reg
)
10101 enum rtx_code code
= GET_CODE (op
);
10103 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
10104 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
10105 return XEXP (op
, 0);
10106 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
10107 return XEXP (op
, 0);
10108 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
10109 || code
== ASHIFTRT
)
10111 if (!CONST_INT_P (XEXP (op
, 1)))
10112 *shift_reg
= XEXP (op
, 1);
10113 return XEXP (op
, 0);
10120 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
10122 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
10123 rtx_code code
= GET_CODE (x
);
10124 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
10126 switch (XINT (x
, 1))
10128 case UNSPEC_UNALIGNED_LOAD
:
10129 /* We can only do unaligned loads into the integer unit, and we can't
10130 use LDM or LDRD. */
10131 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
10133 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
10134 + extra_cost
->ldst
.load_unaligned
);
10137 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
10138 ADDR_SPACE_GENERIC
, speed_p
);
10142 case UNSPEC_UNALIGNED_STORE
:
10143 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
10145 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
10146 + extra_cost
->ldst
.store_unaligned
);
10148 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
10150 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
10151 ADDR_SPACE_GENERIC
, speed_p
);
10155 case UNSPEC_VRINTZ
:
10156 case UNSPEC_VRINTP
:
10157 case UNSPEC_VRINTM
:
10158 case UNSPEC_VRINTR
:
10159 case UNSPEC_VRINTX
:
10160 case UNSPEC_VRINTA
:
10162 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
10166 *cost
= COSTS_N_INSNS (2);
10172 /* Cost of a libcall. We assume one insn per argument, an amount for the
10173 call (one insn for -Os) and then one for processing the result. */
10174 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10176 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
10179 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
10180 if (shift_op != NULL \
10181 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
10186 *cost += extra_cost->alu.arith_shift_reg; \
10187 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10188 ASHIFT, 1, speed_p); \
10190 else if (speed_p) \
10191 *cost += extra_cost->alu.arith_shift; \
10193 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
10194 ASHIFT, 0, speed_p) \
10195 + rtx_cost (XEXP (x, 1 - IDX), \
10196 GET_MODE (shift_op), \
10197 OP, 1, speed_p)); \
10203 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
10204 considering the costs of the addressing mode and memory access
10207 arm_mem_costs (rtx x
, const struct cpu_cost_table
*extra_cost
,
10208 int *cost
, bool speed_p
)
10210 machine_mode mode
= GET_MODE (x
);
10212 *cost
= COSTS_N_INSNS (1);
10215 && GET_CODE (XEXP (x
, 0)) == PLUS
10216 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
10217 /* This will be split into two instructions. Add the cost of the
10218 additional instruction here. The cost of the memory access is computed
10219 below. See arm.md:calculate_pic_address. */
10220 *cost
+= COSTS_N_INSNS (1);
10222 /* Calculate cost of the addressing mode. */
10225 arm_addr_mode_op op_type
;
10226 switch (GET_CODE (XEXP (x
, 0)))
10230 op_type
= AMO_DEFAULT
;
10233 /* MINUS does not appear in RTL, but the architecture supports it,
10234 so handle this case defensively. */
10237 op_type
= AMO_NO_WB
;
10249 if (VECTOR_MODE_P (mode
))
10250 *cost
+= current_tune
->addr_mode_costs
->vector
[op_type
];
10251 else if (FLOAT_MODE_P (mode
))
10252 *cost
+= current_tune
->addr_mode_costs
->fp
[op_type
];
10254 *cost
+= current_tune
->addr_mode_costs
->integer
[op_type
];
10257 /* Calculate cost of memory access. */
10260 if (FLOAT_MODE_P (mode
))
10262 if (GET_MODE_SIZE (mode
) == 8)
10263 *cost
+= extra_cost
->ldst
.loadd
;
10265 *cost
+= extra_cost
->ldst
.loadf
;
10267 else if (VECTOR_MODE_P (mode
))
10268 *cost
+= extra_cost
->ldst
.loadv
;
10271 /* Integer modes */
10272 if (GET_MODE_SIZE (mode
) == 8)
10273 *cost
+= extra_cost
->ldst
.ldrd
;
10275 *cost
+= extra_cost
->ldst
.load
;
10282 /* Helper for arm_bfi_p. */
10284 arm_bfi_1_p (rtx op0
, rtx op1
, rtx
*sub0
, rtx
*sub1
)
10286 unsigned HOST_WIDE_INT const1
;
10287 unsigned HOST_WIDE_INT const2
= 0;
10289 if (!CONST_INT_P (XEXP (op0
, 1)))
10292 const1
= UINTVAL (XEXP (op0
, 1));
10293 if (!CONST_INT_P (XEXP (op1
, 1))
10294 || ~UINTVAL (XEXP (op1
, 1)) != const1
)
10297 if (GET_CODE (XEXP (op0
, 0)) == ASHIFT
10298 && CONST_INT_P (XEXP (XEXP (op0
, 0), 1)))
10300 const2
= UINTVAL (XEXP (XEXP (op0
, 0), 1));
10301 *sub0
= XEXP (XEXP (op0
, 0), 0);
10304 *sub0
= XEXP (op0
, 0);
10306 if (const2
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
10309 *sub1
= XEXP (op1
, 0);
10310 return exact_log2 (const1
+ (HOST_WIDE_INT_1U
<< const2
)) >= 0;
10313 /* Recognize a BFI idiom. Helper for arm_rtx_costs_internal. The
10314 format looks something like:
10316 (IOR (AND (reg1) (~const1))
10317 (AND (ASHIFT (reg2) (const2))
10320 where const1 is a consecutive sequence of 1-bits with the
10321 least-significant non-zero bit starting at bit position const2. If
10322 const2 is zero, then the shift will not appear at all, due to
10323 canonicalization. The two arms of the IOR expression may be
10326 arm_bfi_p (rtx x
, rtx
*sub0
, rtx
*sub1
)
10328 if (GET_CODE (x
) != IOR
)
10330 if (GET_CODE (XEXP (x
, 0)) != AND
10331 || GET_CODE (XEXP (x
, 1)) != AND
)
10333 return (arm_bfi_1_p (XEXP (x
, 0), XEXP (x
, 1), sub0
, sub1
)
10334 || arm_bfi_1_p (XEXP (x
, 1), XEXP (x
, 0), sub1
, sub0
));
10337 /* RTX costs. Make an estimate of the cost of executing the operation
10338 X, which is contained within an operation with code OUTER_CODE.
10339 SPEED_P indicates whether the cost desired is the performance cost,
10340 or the size cost. The estimate is stored in COST and the return
10341 value is TRUE if the cost calculation is final, or FALSE if the
10342 caller should recurse through the operands of X to add additional
10345 We currently make no attempt to model the size savings of Thumb-2
10346 16-bit instructions. At the normal points in compilation where
10347 this code is called we have no measure of whether the condition
10348 flags are live or not, and thus no realistic way to determine what
10349 the size will eventually be. */
10351 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10352 const struct cpu_cost_table
*extra_cost
,
10353 int *cost
, bool speed_p
)
10355 machine_mode mode
= GET_MODE (x
);
10357 *cost
= COSTS_N_INSNS (1);
10362 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
10364 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
10372 /* SET RTXs don't have a mode so we get it from the destination. */
10373 mode
= GET_MODE (SET_DEST (x
));
10375 if (REG_P (SET_SRC (x
))
10376 && REG_P (SET_DEST (x
)))
10378 /* Assume that most copies can be done with a single insn,
10379 unless we don't have HW FP, in which case everything
10380 larger than word mode will require two insns. */
10381 *cost
= COSTS_N_INSNS (((!TARGET_VFP_BASE
10382 && GET_MODE_SIZE (mode
) > 4)
10385 /* Conditional register moves can be encoded
10386 in 16 bits in Thumb mode. */
10387 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
10393 if (CONST_INT_P (SET_SRC (x
)))
10395 /* Handle CONST_INT here, since the value doesn't have a mode
10396 and we would otherwise be unable to work out the true cost. */
10397 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
10400 /* Slightly lower the cost of setting a core reg to a constant.
10401 This helps break up chains and allows for better scheduling. */
10402 if (REG_P (SET_DEST (x
))
10403 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
10406 /* Immediate moves with an immediate in the range [0, 255] can be
10407 encoded in 16 bits in Thumb mode. */
10408 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
10409 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
10411 goto const_int_cost
;
10417 return arm_mem_costs (x
, extra_cost
, cost
, speed_p
);
10421 /* Calculations of LDM costs are complex. We assume an initial cost
10422 (ldm_1st) which will load the number of registers mentioned in
10423 ldm_regs_per_insn_1st registers; then each additional
10424 ldm_regs_per_insn_subsequent registers cost one more insn. The
10425 formula for N regs is thus:
10427 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10428 + ldm_regs_per_insn_subsequent - 1)
10429 / ldm_regs_per_insn_subsequent).
10431 Additional costs may also be added for addressing. A similar
10432 formula is used for STM. */
10434 bool is_ldm
= load_multiple_operation (x
, SImode
);
10435 bool is_stm
= store_multiple_operation (x
, SImode
);
10437 if (is_ldm
|| is_stm
)
10441 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
10442 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
10443 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
10444 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
10445 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
10446 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
10447 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
10449 *cost
+= regs_per_insn_1st
10450 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
10451 + regs_per_insn_sub
- 1)
10452 / regs_per_insn_sub
);
10461 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10462 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10463 *cost
+= COSTS_N_INSNS (speed_p
10464 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
10465 else if (mode
== SImode
&& TARGET_IDIV
)
10466 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
10468 *cost
= LIBCALL_COST (2);
10470 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10471 possible udiv is prefered. */
10472 *cost
+= (code
== DIV
? COSTS_N_INSNS (1) : 0);
10473 return false; /* All arguments must be in registers. */
10476 /* MOD by a power of 2 can be expanded as:
10478 and r0, r0, #(n - 1)
10479 and r1, r1, #(n - 1)
10480 rsbpl r0, r1, #0. */
10481 if (CONST_INT_P (XEXP (x
, 1))
10482 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
10485 *cost
+= COSTS_N_INSNS (3);
10488 *cost
+= 2 * extra_cost
->alu
.logical
10489 + extra_cost
->alu
.arith
;
10493 /* Fall-through. */
10495 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10496 possible udiv is prefered. */
10497 *cost
= LIBCALL_COST (2) + (code
== MOD
? COSTS_N_INSNS (1) : 0);
10498 return false; /* All arguments must be in registers. */
10501 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
10503 *cost
+= (COSTS_N_INSNS (1)
10504 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
10506 *cost
+= extra_cost
->alu
.shift_reg
;
10514 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
10516 *cost
+= (COSTS_N_INSNS (2)
10517 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
10519 *cost
+= 2 * extra_cost
->alu
.shift
;
10520 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10521 if (code
== ASHIFT
&& XEXP (x
, 1) == CONST1_RTX (SImode
))
10525 else if (mode
== SImode
)
10527 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10528 /* Slightly disparage register shifts at -Os, but not by much. */
10529 if (!CONST_INT_P (XEXP (x
, 1)))
10530 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
10531 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10534 else if (GET_MODE_CLASS (mode
) == MODE_INT
10535 && GET_MODE_SIZE (mode
) < 4)
10537 if (code
== ASHIFT
)
10539 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10540 /* Slightly disparage register shifts at -Os, but not by
10542 if (!CONST_INT_P (XEXP (x
, 1)))
10543 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
10544 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10546 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
10548 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
10550 /* Can use SBFX/UBFX. */
10552 *cost
+= extra_cost
->alu
.bfx
;
10553 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10557 *cost
+= COSTS_N_INSNS (1);
10558 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10561 if (CONST_INT_P (XEXP (x
, 1)))
10562 *cost
+= 2 * extra_cost
->alu
.shift
;
10564 *cost
+= (extra_cost
->alu
.shift
10565 + extra_cost
->alu
.shift_reg
);
10568 /* Slightly disparage register shifts. */
10569 *cost
+= !CONST_INT_P (XEXP (x
, 1));
10572 else /* Rotates. */
10574 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
10575 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10578 if (CONST_INT_P (XEXP (x
, 1)))
10579 *cost
+= (2 * extra_cost
->alu
.shift
10580 + extra_cost
->alu
.log_shift
);
10582 *cost
+= (extra_cost
->alu
.shift
10583 + extra_cost
->alu
.shift_reg
10584 + extra_cost
->alu
.log_shift_reg
);
10590 *cost
= LIBCALL_COST (2);
10596 if (mode
== SImode
)
10599 *cost
+= extra_cost
->alu
.rev
;
10606 /* No rev instruction available. Look at arm_legacy_rev
10607 and thumb_legacy_rev for the form of RTL used then. */
10610 *cost
+= COSTS_N_INSNS (9);
10614 *cost
+= 6 * extra_cost
->alu
.shift
;
10615 *cost
+= 3 * extra_cost
->alu
.logical
;
10620 *cost
+= COSTS_N_INSNS (4);
10624 *cost
+= 2 * extra_cost
->alu
.shift
;
10625 *cost
+= extra_cost
->alu
.arith_shift
;
10626 *cost
+= 2 * extra_cost
->alu
.logical
;
10634 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10635 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10637 if (GET_CODE (XEXP (x
, 0)) == MULT
10638 || GET_CODE (XEXP (x
, 1)) == MULT
)
10640 rtx mul_op0
, mul_op1
, sub_op
;
10643 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10645 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10647 mul_op0
= XEXP (XEXP (x
, 0), 0);
10648 mul_op1
= XEXP (XEXP (x
, 0), 1);
10649 sub_op
= XEXP (x
, 1);
10653 mul_op0
= XEXP (XEXP (x
, 1), 0);
10654 mul_op1
= XEXP (XEXP (x
, 1), 1);
10655 sub_op
= XEXP (x
, 0);
10658 /* The first operand of the multiply may be optionally
10660 if (GET_CODE (mul_op0
) == NEG
)
10661 mul_op0
= XEXP (mul_op0
, 0);
10663 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10664 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10665 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
10671 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10675 if (mode
== SImode
)
10677 rtx shift_by_reg
= NULL
;
10680 rtx op0
= XEXP (x
, 0);
10681 rtx op1
= XEXP (x
, 1);
10683 /* Factor out any borrow operation. There's more than one way
10684 of expressing this; try to recognize them all. */
10685 if (GET_CODE (op0
) == MINUS
)
10687 if (arm_borrow_operation (op1
, SImode
))
10689 op1
= XEXP (op0
, 1);
10690 op0
= XEXP (op0
, 0);
10692 else if (arm_borrow_operation (XEXP (op0
, 1), SImode
))
10693 op0
= XEXP (op0
, 0);
10695 else if (GET_CODE (op1
) == PLUS
10696 && arm_borrow_operation (XEXP (op1
, 0), SImode
))
10697 op1
= XEXP (op1
, 0);
10698 else if (GET_CODE (op0
) == NEG
10699 && arm_borrow_operation (op1
, SImode
))
10701 /* Negate with carry-in. For Thumb2 this is done with
10702 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10703 RSC instruction that exists in Arm mode. */
10705 *cost
+= (TARGET_THUMB2
10706 ? extra_cost
->alu
.arith_shift
10707 : extra_cost
->alu
.arith
);
10708 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, MINUS
, 0, speed_p
);
10711 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10712 Note we do mean ~borrow here. */
10713 else if (TARGET_ARM
&& arm_carry_operation (op0
, SImode
))
10715 *cost
+= rtx_cost (op1
, mode
, code
, 1, speed_p
);
10719 shift_op
= shifter_op_p (op0
, &shift_by_reg
);
10720 if (shift_op
== NULL
)
10722 shift_op
= shifter_op_p (op1
, &shift_by_reg
);
10723 non_shift_op
= op0
;
10726 non_shift_op
= op1
;
10728 if (shift_op
!= NULL
)
10730 if (shift_by_reg
!= NULL
)
10733 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10734 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
10737 *cost
+= extra_cost
->alu
.arith_shift
;
10739 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
10740 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
10744 if (arm_arch_thumb2
10745 && GET_CODE (XEXP (x
, 1)) == MULT
)
10749 *cost
+= extra_cost
->mult
[0].add
;
10750 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
10751 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
10752 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
10756 if (CONST_INT_P (op0
))
10758 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
10759 INTVAL (op0
), NULL_RTX
,
10761 *cost
= COSTS_N_INSNS (insns
);
10763 *cost
+= insns
* extra_cost
->alu
.arith
;
10764 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10768 *cost
+= extra_cost
->alu
.arith
;
10770 /* Don't recurse as we don't want to cost any borrow that
10772 *cost
+= rtx_cost (op0
, mode
, MINUS
, 0, speed_p
);
10773 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10777 if (GET_MODE_CLASS (mode
) == MODE_INT
10778 && GET_MODE_SIZE (mode
) < 4)
10780 rtx shift_op
, shift_reg
;
10783 /* We check both sides of the MINUS for shifter operands since,
10784 unlike PLUS, it's not commutative. */
10786 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0);
10787 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1);
10789 /* Slightly disparage, as we might need to widen the result. */
10792 *cost
+= extra_cost
->alu
.arith
;
10794 if (CONST_INT_P (XEXP (x
, 0)))
10796 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10803 if (mode
== DImode
)
10805 *cost
+= COSTS_N_INSNS (1);
10807 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
10809 rtx op1
= XEXP (x
, 1);
10812 *cost
+= 2 * extra_cost
->alu
.arith
;
10814 if (GET_CODE (op1
) == ZERO_EXTEND
)
10815 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
10818 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10819 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10823 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10826 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
10827 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
10829 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
10832 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10833 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
10836 *cost
+= (extra_cost
->alu
.arith
10837 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10838 ? extra_cost
->alu
.arith
10839 : extra_cost
->alu
.arith_shift
));
10840 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
10841 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10842 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
10847 *cost
+= 2 * extra_cost
->alu
.arith
;
10853 *cost
= LIBCALL_COST (2);
10857 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10858 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10860 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10862 rtx mul_op0
, mul_op1
, add_op
;
10865 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10867 mul_op0
= XEXP (XEXP (x
, 0), 0);
10868 mul_op1
= XEXP (XEXP (x
, 0), 1);
10869 add_op
= XEXP (x
, 1);
10871 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10872 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10873 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
10879 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10882 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10884 *cost
= LIBCALL_COST (2);
10888 /* Narrow modes can be synthesized in SImode, but the range
10889 of useful sub-operations is limited. Check for shift operations
10890 on one of the operands. Only left shifts can be used in the
10892 if (GET_MODE_CLASS (mode
) == MODE_INT
10893 && GET_MODE_SIZE (mode
) < 4)
10895 rtx shift_op
, shift_reg
;
10898 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0);
10900 if (CONST_INT_P (XEXP (x
, 1)))
10902 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10903 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10905 *cost
= COSTS_N_INSNS (insns
);
10907 *cost
+= insns
* extra_cost
->alu
.arith
;
10908 /* Slightly penalize a narrow operation as the result may
10910 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10914 /* Slightly penalize a narrow operation as the result may
10918 *cost
+= extra_cost
->alu
.arith
;
10923 if (mode
== SImode
)
10925 rtx shift_op
, shift_reg
;
10927 if (TARGET_INT_SIMD
10928 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10929 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10931 /* UXTA[BH] or SXTA[BH]. */
10933 *cost
+= extra_cost
->alu
.extend_arith
;
10934 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10936 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
10940 rtx op0
= XEXP (x
, 0);
10941 rtx op1
= XEXP (x
, 1);
10943 /* Handle a side effect of adding in the carry to an addition. */
10944 if (GET_CODE (op0
) == PLUS
10945 && arm_carry_operation (op1
, mode
))
10947 op1
= XEXP (op0
, 1);
10948 op0
= XEXP (op0
, 0);
10950 else if (GET_CODE (op1
) == PLUS
10951 && arm_carry_operation (op0
, mode
))
10953 op0
= XEXP (op1
, 0);
10954 op1
= XEXP (op1
, 1);
10956 else if (GET_CODE (op0
) == PLUS
)
10958 op0
= strip_carry_operation (op0
);
10959 if (swap_commutative_operands_p (op0
, op1
))
10960 std::swap (op0
, op1
);
10963 if (arm_carry_operation (op0
, mode
))
10965 /* Adding the carry to a register is a canonicalization of
10966 adding 0 to the register plus the carry. */
10968 *cost
+= extra_cost
->alu
.arith
;
10969 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
10974 shift_op
= shifter_op_p (op0
, &shift_reg
);
10975 if (shift_op
!= NULL
)
10980 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10981 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10984 *cost
+= extra_cost
->alu
.arith_shift
;
10986 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10987 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10991 if (GET_CODE (op0
) == MULT
)
10995 if (TARGET_DSP_MULTIPLY
10996 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
10997 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10998 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10999 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
11000 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
11001 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
11002 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
11003 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
11004 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
11005 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
11006 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
11007 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
11010 /* SMLA[BT][BT]. */
11012 *cost
+= extra_cost
->mult
[0].extend_add
;
11013 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
11014 SIGN_EXTEND
, 0, speed_p
)
11015 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
11016 SIGN_EXTEND
, 0, speed_p
)
11017 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
11022 *cost
+= extra_cost
->mult
[0].add
;
11023 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
11024 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
11025 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
11029 if (CONST_INT_P (op1
))
11031 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
11032 INTVAL (op1
), NULL_RTX
,
11034 *cost
= COSTS_N_INSNS (insns
);
11036 *cost
+= insns
* extra_cost
->alu
.arith
;
11037 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
11042 *cost
+= extra_cost
->alu
.arith
;
11044 /* Don't recurse here because we want to test the operands
11045 without any carry operation. */
11046 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
11047 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
11051 if (mode
== DImode
)
11053 if (GET_CODE (XEXP (x
, 0)) == MULT
11054 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
11055 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
11056 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
11057 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
11060 *cost
+= extra_cost
->mult
[1].extend_add
;
11061 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
11062 ZERO_EXTEND
, 0, speed_p
)
11063 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
11064 ZERO_EXTEND
, 0, speed_p
)
11065 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
11069 *cost
+= COSTS_N_INSNS (1);
11071 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11072 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
11075 *cost
+= (extra_cost
->alu
.arith
11076 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11077 ? extra_cost
->alu
.arith
11078 : extra_cost
->alu
.arith_shift
));
11080 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
11082 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
11087 *cost
+= 2 * extra_cost
->alu
.arith
;
11092 *cost
= LIBCALL_COST (2);
11097 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
11100 *cost
+= extra_cost
->alu
.rev
;
11104 else if (mode
== SImode
&& arm_arch_thumb2
11105 && arm_bfi_p (x
, &sub0
, &sub1
))
11107 *cost
+= rtx_cost (sub0
, mode
, ZERO_EXTRACT
, 1, speed_p
);
11108 *cost
+= rtx_cost (sub1
, mode
, ZERO_EXTRACT
, 0, speed_p
);
11110 *cost
+= extra_cost
->alu
.bfi
;
11116 /* Fall through. */
11117 case AND
: case XOR
:
11118 if (mode
== SImode
)
11120 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
11121 rtx op0
= XEXP (x
, 0);
11122 rtx shift_op
, shift_reg
;
11126 || (code
== IOR
&& TARGET_THUMB2
)))
11127 op0
= XEXP (op0
, 0);
11130 shift_op
= shifter_op_p (op0
, &shift_reg
);
11131 if (shift_op
!= NULL
)
11136 *cost
+= extra_cost
->alu
.log_shift_reg
;
11137 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
11140 *cost
+= extra_cost
->alu
.log_shift
;
11142 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
11143 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
11147 if (CONST_INT_P (XEXP (x
, 1)))
11149 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
11150 INTVAL (XEXP (x
, 1)), NULL_RTX
,
11153 *cost
= COSTS_N_INSNS (insns
);
11155 *cost
+= insns
* extra_cost
->alu
.logical
;
11156 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
11161 *cost
+= extra_cost
->alu
.logical
;
11162 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
11163 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
11167 if (mode
== DImode
)
11169 rtx op0
= XEXP (x
, 0);
11170 enum rtx_code subcode
= GET_CODE (op0
);
11172 *cost
+= COSTS_N_INSNS (1);
11176 || (code
== IOR
&& TARGET_THUMB2
)))
11177 op0
= XEXP (op0
, 0);
11179 if (GET_CODE (op0
) == ZERO_EXTEND
)
11182 *cost
+= 2 * extra_cost
->alu
.logical
;
11184 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
11186 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
11189 else if (GET_CODE (op0
) == SIGN_EXTEND
)
11192 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
11194 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
11196 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
11201 *cost
+= 2 * extra_cost
->alu
.logical
;
11207 *cost
= LIBCALL_COST (2);
11211 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11212 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11214 rtx op0
= XEXP (x
, 0);
11216 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
11217 op0
= XEXP (op0
, 0);
11220 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
11222 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
11223 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
11226 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11228 *cost
= LIBCALL_COST (2);
11232 if (mode
== SImode
)
11234 if (TARGET_DSP_MULTIPLY
11235 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
11236 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
11237 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
11238 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
11239 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
11240 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
11241 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11242 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
11243 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
11244 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
11245 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
11246 && (INTVAL (XEXP (XEXP (x
, 1), 1))
11249 /* SMUL[TB][TB]. */
11251 *cost
+= extra_cost
->mult
[0].extend
;
11252 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
11253 SIGN_EXTEND
, 0, speed_p
);
11254 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
11255 SIGN_EXTEND
, 1, speed_p
);
11259 *cost
+= extra_cost
->mult
[0].simple
;
11263 if (mode
== DImode
)
11265 if ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11266 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
11267 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
11268 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
))
11271 *cost
+= extra_cost
->mult
[1].extend
;
11272 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
11273 ZERO_EXTEND
, 0, speed_p
)
11274 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
11275 ZERO_EXTEND
, 0, speed_p
));
11279 *cost
= LIBCALL_COST (2);
11284 *cost
= LIBCALL_COST (2);
11288 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11289 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11291 if (GET_CODE (XEXP (x
, 0)) == MULT
)
11294 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
11299 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
11303 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11305 *cost
= LIBCALL_COST (1);
11309 if (mode
== SImode
)
11311 if (GET_CODE (XEXP (x
, 0)) == ABS
)
11313 *cost
+= COSTS_N_INSNS (1);
11314 /* Assume the non-flag-changing variant. */
11316 *cost
+= (extra_cost
->alu
.log_shift
11317 + extra_cost
->alu
.arith_shift
);
11318 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
11322 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
11323 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
11325 *cost
+= COSTS_N_INSNS (1);
11326 /* No extra cost for MOV imm and MVN imm. */
11327 /* If the comparison op is using the flags, there's no further
11328 cost, otherwise we need to add the cost of the comparison. */
11329 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
11330 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
11331 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
11333 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
11334 *cost
+= (COSTS_N_INSNS (1)
11335 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
11337 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
11340 *cost
+= extra_cost
->alu
.arith
;
11346 *cost
+= extra_cost
->alu
.arith
;
11350 if (GET_MODE_CLASS (mode
) == MODE_INT
11351 && GET_MODE_SIZE (mode
) < 4)
11353 /* Slightly disparage, as we might need an extend operation. */
11356 *cost
+= extra_cost
->alu
.arith
;
11360 if (mode
== DImode
)
11362 *cost
+= COSTS_N_INSNS (1);
11364 *cost
+= 2 * extra_cost
->alu
.arith
;
11369 *cost
= LIBCALL_COST (1);
11373 if (mode
== SImode
)
11376 rtx shift_reg
= NULL
;
11378 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
11382 if (shift_reg
!= NULL
)
11385 *cost
+= extra_cost
->alu
.log_shift_reg
;
11386 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
11389 *cost
+= extra_cost
->alu
.log_shift
;
11390 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
11395 *cost
+= extra_cost
->alu
.logical
;
11398 if (mode
== DImode
)
11400 *cost
+= COSTS_N_INSNS (1);
11406 *cost
+= LIBCALL_COST (1);
11411 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
11413 *cost
+= COSTS_N_INSNS (3);
11416 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
11417 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
11419 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
11420 /* Assume that if one arm of the if_then_else is a register,
11421 that it will be tied with the result and eliminate the
11422 conditional insn. */
11423 if (REG_P (XEXP (x
, 1)))
11425 else if (REG_P (XEXP (x
, 2)))
11431 if (extra_cost
->alu
.non_exec_costs_exec
)
11432 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
11434 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
11437 *cost
+= op1cost
+ op2cost
;
11443 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
11447 machine_mode op0mode
;
11448 /* We'll mostly assume that the cost of a compare is the cost of the
11449 LHS. However, there are some notable exceptions. */
11451 /* Floating point compares are never done as side-effects. */
11452 op0mode
= GET_MODE (XEXP (x
, 0));
11453 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
11454 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11457 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
11459 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
11461 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
11467 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
11469 *cost
= LIBCALL_COST (2);
11473 /* DImode compares normally take two insns. */
11474 if (op0mode
== DImode
)
11476 *cost
+= COSTS_N_INSNS (1);
11478 *cost
+= 2 * extra_cost
->alu
.arith
;
11482 if (op0mode
== SImode
)
11487 if (XEXP (x
, 1) == const0_rtx
11488 && !(REG_P (XEXP (x
, 0))
11489 || (GET_CODE (XEXP (x
, 0)) == SUBREG
11490 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
11492 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
11494 /* Multiply operations that set the flags are often
11495 significantly more expensive. */
11497 && GET_CODE (XEXP (x
, 0)) == MULT
11498 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
11499 *cost
+= extra_cost
->mult
[0].flag_setting
;
11502 && GET_CODE (XEXP (x
, 0)) == PLUS
11503 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11504 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
11506 *cost
+= extra_cost
->mult
[0].flag_setting
;
11511 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
11512 if (shift_op
!= NULL
)
11514 if (shift_reg
!= NULL
)
11516 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
11519 *cost
+= extra_cost
->alu
.arith_shift_reg
;
11522 *cost
+= extra_cost
->alu
.arith_shift
;
11523 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
11524 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
11529 *cost
+= extra_cost
->alu
.arith
;
11530 if (CONST_INT_P (XEXP (x
, 1))
11531 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
11533 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
11541 *cost
= LIBCALL_COST (2);
11551 /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11554 && TARGET_HARD_FLOAT
11555 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
11556 && (XEXP (x
, 1) == CONST0_RTX (mode
)))
11562 /* Fall through. */
11576 if (outer_code
== SET
)
11578 /* Is it a store-flag operation? */
11579 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
11580 && XEXP (x
, 1) == const0_rtx
)
11582 /* Thumb also needs an IT insn. */
11583 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
11586 if (XEXP (x
, 1) == const0_rtx
)
11591 /* LSR Rd, Rn, #31. */
11593 *cost
+= extra_cost
->alu
.shift
;
11603 *cost
+= COSTS_N_INSNS (1);
11607 /* RSBS T1, Rn, Rn, LSR #31
11609 *cost
+= COSTS_N_INSNS (1);
11611 *cost
+= extra_cost
->alu
.arith_shift
;
11615 /* RSB Rd, Rn, Rn, ASR #1
11616 LSR Rd, Rd, #31. */
11617 *cost
+= COSTS_N_INSNS (1);
11619 *cost
+= (extra_cost
->alu
.arith_shift
11620 + extra_cost
->alu
.shift
);
11626 *cost
+= COSTS_N_INSNS (1);
11628 *cost
+= extra_cost
->alu
.shift
;
11632 /* Remaining cases are either meaningless or would take
11633 three insns anyway. */
11634 *cost
= COSTS_N_INSNS (3);
11637 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11642 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
11643 if (CONST_INT_P (XEXP (x
, 1))
11644 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
11646 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11653 /* Not directly inside a set. If it involves the condition code
11654 register it must be the condition for a branch, cond_exec or
11655 I_T_E operation. Since the comparison is performed elsewhere
11656 this is just the control part which has no additional
11658 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
11659 && XEXP (x
, 1) == const0_rtx
)
11667 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11668 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11671 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
11675 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11677 *cost
= LIBCALL_COST (1);
11681 if (mode
== SImode
)
11684 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
11688 *cost
= LIBCALL_COST (1);
11692 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
11693 && MEM_P (XEXP (x
, 0)))
11695 if (mode
== DImode
)
11696 *cost
+= COSTS_N_INSNS (1);
11701 if (GET_MODE (XEXP (x
, 0)) == SImode
)
11702 *cost
+= extra_cost
->ldst
.load
;
11704 *cost
+= extra_cost
->ldst
.load_sign_extend
;
11706 if (mode
== DImode
)
11707 *cost
+= extra_cost
->alu
.shift
;
11712 /* Widening from less than 32-bits requires an extend operation. */
11713 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11715 /* We have SXTB/SXTH. */
11716 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11718 *cost
+= extra_cost
->alu
.extend
;
11720 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11722 /* Needs two shifts. */
11723 *cost
+= COSTS_N_INSNS (1);
11724 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11726 *cost
+= 2 * extra_cost
->alu
.shift
;
11729 /* Widening beyond 32-bits requires one more insn. */
11730 if (mode
== DImode
)
11732 *cost
+= COSTS_N_INSNS (1);
11734 *cost
+= extra_cost
->alu
.shift
;
11741 || GET_MODE (XEXP (x
, 0)) == SImode
11742 || GET_MODE (XEXP (x
, 0)) == QImode
)
11743 && MEM_P (XEXP (x
, 0)))
11745 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11747 if (mode
== DImode
)
11748 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11753 /* Widening from less than 32-bits requires an extend operation. */
11754 if (GET_MODE (XEXP (x
, 0)) == QImode
)
11756 /* UXTB can be a shorter instruction in Thumb2, but it might
11757 be slower than the AND Rd, Rn, #255 alternative. When
11758 optimizing for speed it should never be slower to use
11759 AND, and we don't really model 16-bit vs 32-bit insns
11762 *cost
+= extra_cost
->alu
.logical
;
11764 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11766 /* We have UXTB/UXTH. */
11767 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11769 *cost
+= extra_cost
->alu
.extend
;
11771 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11773 /* Needs two shifts. It's marginally preferable to use
11774 shifts rather than two BIC instructions as the second
11775 shift may merge with a subsequent insn as a shifter
11777 *cost
= COSTS_N_INSNS (2);
11778 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11780 *cost
+= 2 * extra_cost
->alu
.shift
;
11783 /* Widening beyond 32-bits requires one more insn. */
11784 if (mode
== DImode
)
11786 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11793 /* CONST_INT has no mode, so we cannot tell for sure how many
11794 insns are really going to be needed. The best we can do is
11795 look at the value passed. If it fits in SImode, then assume
11796 that's the mode it will be used for. Otherwise assume it
11797 will be used in DImode. */
11798 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
11803 /* Avoid blowing up in arm_gen_constant (). */
11804 if (!(outer_code
== PLUS
11805 || outer_code
== AND
11806 || outer_code
== IOR
11807 || outer_code
== XOR
11808 || outer_code
== MINUS
))
11812 if (mode
== SImode
)
11814 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
11815 INTVAL (x
), NULL
, NULL
,
11821 *cost
+= COSTS_N_INSNS (arm_gen_constant
11822 (outer_code
, SImode
, NULL
,
11823 trunc_int_for_mode (INTVAL (x
), SImode
),
11825 + arm_gen_constant (outer_code
, SImode
, NULL
,
11826 INTVAL (x
) >> 32, NULL
,
11838 if (arm_arch_thumb2
&& !flag_pic
)
11839 *cost
+= COSTS_N_INSNS (1);
11841 *cost
+= extra_cost
->ldst
.load
;
11844 *cost
+= COSTS_N_INSNS (1);
11848 *cost
+= COSTS_N_INSNS (1);
11850 *cost
+= extra_cost
->alu
.arith
;
11856 *cost
= COSTS_N_INSNS (4);
11861 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11862 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11864 if (vfp3_const_double_rtx (x
))
11867 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
11873 if (mode
== DFmode
)
11874 *cost
+= extra_cost
->ldst
.loadd
;
11876 *cost
+= extra_cost
->ldst
.loadf
;
11879 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
11883 *cost
= COSTS_N_INSNS (4);
11888 if (((TARGET_NEON
&& TARGET_HARD_FLOAT
11889 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
)))
11890 || TARGET_HAVE_MVE
)
11891 && simd_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
11892 *cost
= COSTS_N_INSNS (1);
11894 *cost
= COSTS_N_INSNS (4);
11899 /* When optimizing for size, we prefer constant pool entries to
11900 MOVW/MOVT pairs, so bump the cost of these slightly. */
11907 *cost
+= extra_cost
->alu
.clz
;
11911 if (XEXP (x
, 1) == const0_rtx
)
11914 *cost
+= extra_cost
->alu
.log_shift
;
11915 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11918 /* Fall through. */
11922 *cost
+= COSTS_N_INSNS (1);
11926 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
11927 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11928 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
11929 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11930 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
11931 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
11932 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
11933 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11937 *cost
+= extra_cost
->mult
[1].extend
;
11938 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
11939 ZERO_EXTEND
, 0, speed_p
)
11940 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
11941 ZERO_EXTEND
, 0, speed_p
));
11944 *cost
= LIBCALL_COST (1);
11947 case UNSPEC_VOLATILE
:
11949 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
11952 /* Reading the PC is like reading any other register. Writing it
11953 is more expensive, but we take that into account elsewhere. */
11958 /* TODO: Simple zero_extract of bottom bits using AND. */
11959 /* Fall through. */
11963 && CONST_INT_P (XEXP (x
, 1))
11964 && CONST_INT_P (XEXP (x
, 2)))
11967 *cost
+= extra_cost
->alu
.bfx
;
11968 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11971 /* Without UBFX/SBFX, need to resort to shift operations. */
11972 *cost
+= COSTS_N_INSNS (1);
11974 *cost
+= 2 * extra_cost
->alu
.shift
;
11975 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
11979 if (TARGET_HARD_FLOAT
)
11982 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
11984 && GET_MODE (XEXP (x
, 0)) == HFmode
)
11986 /* Pre v8, widening HF->DF is a two-step process, first
11987 widening to SFmode. */
11988 *cost
+= COSTS_N_INSNS (1);
11990 *cost
+= extra_cost
->fp
[0].widen
;
11992 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11996 *cost
= LIBCALL_COST (1);
11999 case FLOAT_TRUNCATE
:
12000 if (TARGET_HARD_FLOAT
)
12003 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
12004 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
12006 /* Vector modes? */
12008 *cost
= LIBCALL_COST (1);
12012 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
12014 rtx op0
= XEXP (x
, 0);
12015 rtx op1
= XEXP (x
, 1);
12016 rtx op2
= XEXP (x
, 2);
12019 /* vfms or vfnma. */
12020 if (GET_CODE (op0
) == NEG
)
12021 op0
= XEXP (op0
, 0);
12023 /* vfnms or vfnma. */
12024 if (GET_CODE (op2
) == NEG
)
12025 op2
= XEXP (op2
, 0);
12027 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
12028 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
12029 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
12032 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
12037 *cost
= LIBCALL_COST (3);
12042 if (TARGET_HARD_FLOAT
)
12044 /* The *combine_vcvtf2i reduces a vmul+vcvt into
12045 a vcvt fixed-point conversion. */
12046 if (code
== FIX
&& mode
== SImode
12047 && GET_CODE (XEXP (x
, 0)) == FIX
12048 && GET_MODE (XEXP (x
, 0)) == SFmode
12049 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
12050 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
12054 *cost
+= extra_cost
->fp
[0].toint
;
12056 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
12061 if (GET_MODE_CLASS (mode
) == MODE_INT
)
12063 mode
= GET_MODE (XEXP (x
, 0));
12065 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
12066 /* Strip of the 'cost' of rounding towards zero. */
12067 if (GET_CODE (XEXP (x
, 0)) == FIX
)
12068 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
12071 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
12072 /* ??? Increase the cost to deal with transferring from
12073 FP -> CORE registers? */
12076 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
12080 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
12083 /* Vector costs? */
12085 *cost
= LIBCALL_COST (1);
12089 case UNSIGNED_FLOAT
:
12090 if (TARGET_HARD_FLOAT
)
12092 /* ??? Increase the cost to deal with transferring from CORE
12093 -> FP registers? */
12095 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
12098 *cost
= LIBCALL_COST (1);
12106 /* Just a guess. Guess number of instructions in the asm
12107 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
12108 though (see PR60663). */
12109 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
12110 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
12112 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
12116 if (mode
!= VOIDmode
)
12117 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
12119 *cost
= COSTS_N_INSNS (4); /* Who knows? */
12124 #undef HANDLE_NARROW_SHIFT_ARITH
12126 /* RTX costs entry point. */
12129 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
12130 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
12133 int code
= GET_CODE (x
);
12134 gcc_assert (current_tune
->insn_extra_cost
);
12136 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
12137 (enum rtx_code
) outer_code
,
12138 current_tune
->insn_extra_cost
,
12141 if (dump_file
&& arm_verbose_cost
)
12143 print_rtl_single (dump_file
, x
);
12144 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
12145 *total
, result
? "final" : "partial");
12151 arm_insn_cost (rtx_insn
*insn
, bool speed
)
12155 /* Don't cost a simple reg-reg move at a full insn cost: such moves
12156 will likely disappear during register allocation. */
12157 if (!reload_completed
12158 && GET_CODE (PATTERN (insn
)) == SET
12159 && REG_P (SET_DEST (PATTERN (insn
)))
12160 && REG_P (SET_SRC (PATTERN (insn
))))
12162 cost
= pattern_cost (PATTERN (insn
), speed
);
12163 /* If the cost is zero, then it's likely a complex insn. We don't want the
12164 cost of these to be less than something we know about. */
12165 return cost
? cost
: COSTS_N_INSNS (2);
12168 /* All address computations that can be done are free, but rtx cost returns
12169 the same for practically all of them. So we weight the different types
12170 of address here in the order (most pref first):
12171 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
12173 arm_arm_address_cost (rtx x
)
12175 enum rtx_code c
= GET_CODE (x
);
12177 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
12179 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
12184 if (CONST_INT_P (XEXP (x
, 1)))
12187 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
12197 arm_thumb_address_cost (rtx x
)
12199 enum rtx_code c
= GET_CODE (x
);
12204 && REG_P (XEXP (x
, 0))
12205 && CONST_INT_P (XEXP (x
, 1)))
12212 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
12213 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
12215 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
12218 /* Adjust cost hook for XScale. */
12220 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12223 /* Some true dependencies can have a higher cost depending
12224 on precisely how certain input operands are used. */
12226 && recog_memoized (insn
) >= 0
12227 && recog_memoized (dep
) >= 0)
12229 int shift_opnum
= get_attr_shift (insn
);
12230 enum attr_type attr_type
= get_attr_type (dep
);
12232 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12233 operand for INSN. If we have a shifted input operand and the
12234 instruction we depend on is another ALU instruction, then we may
12235 have to account for an additional stall. */
12236 if (shift_opnum
!= 0
12237 && (attr_type
== TYPE_ALU_SHIFT_IMM_LSL_1TO4
12238 || attr_type
== TYPE_ALU_SHIFT_IMM_OTHER
12239 || attr_type
== TYPE_ALUS_SHIFT_IMM
12240 || attr_type
== TYPE_LOGIC_SHIFT_IMM
12241 || attr_type
== TYPE_LOGICS_SHIFT_IMM
12242 || attr_type
== TYPE_ALU_SHIFT_REG
12243 || attr_type
== TYPE_ALUS_SHIFT_REG
12244 || attr_type
== TYPE_LOGIC_SHIFT_REG
12245 || attr_type
== TYPE_LOGICS_SHIFT_REG
12246 || attr_type
== TYPE_MOV_SHIFT
12247 || attr_type
== TYPE_MVN_SHIFT
12248 || attr_type
== TYPE_MOV_SHIFT_REG
12249 || attr_type
== TYPE_MVN_SHIFT_REG
))
12251 rtx shifted_operand
;
12254 /* Get the shifted operand. */
12255 extract_insn (insn
);
12256 shifted_operand
= recog_data
.operand
[shift_opnum
];
12258 /* Iterate over all the operands in DEP. If we write an operand
12259 that overlaps with SHIFTED_OPERAND, then we have increase the
12260 cost of this dependency. */
12261 extract_insn (dep
);
12262 preprocess_constraints (dep
);
12263 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
12265 /* We can ignore strict inputs. */
12266 if (recog_data
.operand_type
[opno
] == OP_IN
)
12269 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
12281 /* Adjust cost hook for Cortex A9. */
12283 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12293 case REG_DEP_OUTPUT
:
12294 if (recog_memoized (insn
) >= 0
12295 && recog_memoized (dep
) >= 0)
12297 if (GET_CODE (PATTERN (insn
)) == SET
)
12300 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
12302 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
12304 enum attr_type attr_type_insn
= get_attr_type (insn
);
12305 enum attr_type attr_type_dep
= get_attr_type (dep
);
12307 /* By default all dependencies of the form
12310 have an extra latency of 1 cycle because
12311 of the input and output dependency in this
12312 case. However this gets modeled as an true
12313 dependency and hence all these checks. */
12314 if (REG_P (SET_DEST (PATTERN (insn
)))
12315 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
12317 /* FMACS is a special case where the dependent
12318 instruction can be issued 3 cycles before
12319 the normal latency in case of an output
12321 if ((attr_type_insn
== TYPE_FMACS
12322 || attr_type_insn
== TYPE_FMACD
)
12323 && (attr_type_dep
== TYPE_FMACS
12324 || attr_type_dep
== TYPE_FMACD
))
12326 if (dep_type
== REG_DEP_OUTPUT
)
12327 *cost
= insn_default_latency (dep
) - 3;
12329 *cost
= insn_default_latency (dep
);
12334 if (dep_type
== REG_DEP_OUTPUT
)
12335 *cost
= insn_default_latency (dep
) + 1;
12337 *cost
= insn_default_latency (dep
);
12347 gcc_unreachable ();
12353 /* Adjust cost hook for FA726TE. */
12355 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12358 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12359 have penalty of 3. */
12360 if (dep_type
== REG_DEP_TRUE
12361 && recog_memoized (insn
) >= 0
12362 && recog_memoized (dep
) >= 0
12363 && get_attr_conds (dep
) == CONDS_SET
)
12365 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12366 if (get_attr_conds (insn
) == CONDS_USE
12367 && get_attr_type (insn
) != TYPE_BRANCH
)
12373 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
12374 || get_attr_conds (insn
) == CONDS_USE
)
12384 /* Implement TARGET_REGISTER_MOVE_COST.
12386 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12387 it is typically more expensive than a single memory access. We set
12388 the cost to less than two memory accesses so that floating
12389 point to integer conversion does not go through memory. */
12392 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
12393 reg_class_t from
, reg_class_t to
)
12397 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
12398 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
12400 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
12401 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
12403 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
12410 if (from
== HI_REGS
|| to
== HI_REGS
)
12417 /* Implement TARGET_MEMORY_MOVE_COST. */
12420 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
12421 bool in ATTRIBUTE_UNUSED
)
12427 if (GET_MODE_SIZE (mode
) < 4)
12430 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
12434 /* Vectorizer cost model implementation. */
12436 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12438 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
12440 int misalign ATTRIBUTE_UNUSED
)
12444 switch (type_of_cost
)
12447 return current_tune
->vec_costs
->scalar_stmt_cost
;
12450 return current_tune
->vec_costs
->scalar_load_cost
;
12453 return current_tune
->vec_costs
->scalar_store_cost
;
12456 return current_tune
->vec_costs
->vec_stmt_cost
;
12459 return current_tune
->vec_costs
->vec_align_load_cost
;
12462 return current_tune
->vec_costs
->vec_store_cost
;
12464 case vec_to_scalar
:
12465 return current_tune
->vec_costs
->vec_to_scalar_cost
;
12467 case scalar_to_vec
:
12468 return current_tune
->vec_costs
->scalar_to_vec_cost
;
12470 case unaligned_load
:
12471 case vector_gather_load
:
12472 return current_tune
->vec_costs
->vec_unalign_load_cost
;
12474 case unaligned_store
:
12475 case vector_scatter_store
:
12476 return current_tune
->vec_costs
->vec_unalign_store_cost
;
12478 case cond_branch_taken
:
12479 return current_tune
->vec_costs
->cond_taken_branch_cost
;
12481 case cond_branch_not_taken
:
12482 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
12485 case vec_promote_demote
:
12486 return current_tune
->vec_costs
->vec_stmt_cost
;
12488 case vec_construct
:
12489 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
12490 return elements
/ 2 + 1;
12493 gcc_unreachable ();
12497 /* Return true if and only if this insn can dual-issue only as older. */
12499 cortexa7_older_only (rtx_insn
*insn
)
12501 if (recog_memoized (insn
) < 0)
12504 switch (get_attr_type (insn
))
12506 case TYPE_ALU_DSP_REG
:
12507 case TYPE_ALU_SREG
:
12508 case TYPE_ALUS_SREG
:
12509 case TYPE_LOGIC_REG
:
12510 case TYPE_LOGICS_REG
:
12512 case TYPE_ADCS_REG
:
12517 case TYPE_SHIFT_IMM
:
12518 case TYPE_SHIFT_REG
:
12519 case TYPE_LOAD_BYTE
:
12522 case TYPE_FFARITHS
:
12524 case TYPE_FFARITHD
:
12542 case TYPE_F_STORES
:
12549 /* Return true if and only if this insn can dual-issue as younger. */
12551 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
12553 if (recog_memoized (insn
) < 0)
12556 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
12560 switch (get_attr_type (insn
))
12563 case TYPE_ALUS_IMM
:
12564 case TYPE_LOGIC_IMM
:
12565 case TYPE_LOGICS_IMM
:
12570 case TYPE_MOV_SHIFT
:
12571 case TYPE_MOV_SHIFT_REG
:
12581 /* Look for an instruction that can dual issue only as an older
12582 instruction, and move it in front of any instructions that can
12583 dual-issue as younger, while preserving the relative order of all
12584 other instructions in the ready list. This is a hueuristic to help
12585 dual-issue in later cycles, by postponing issue of more flexible
12586 instructions. This heuristic may affect dual issue opportunities
12587 in the current cycle. */
12589 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
12590 int *n_readyp
, int clock
)
12593 int first_older_only
= -1, first_younger
= -1;
12597 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12601 /* Traverse the ready list from the head (the instruction to issue
12602 first), and looking for the first instruction that can issue as
12603 younger and the first instruction that can dual-issue only as
12605 for (i
= *n_readyp
- 1; i
>= 0; i
--)
12607 rtx_insn
*insn
= ready
[i
];
12608 if (cortexa7_older_only (insn
))
12610 first_older_only
= i
;
12612 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
12615 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
12619 /* Nothing to reorder because either no younger insn found or insn
12620 that can dual-issue only as older appears before any insn that
12621 can dual-issue as younger. */
12622 if (first_younger
== -1)
12625 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
12629 /* Nothing to reorder because no older-only insn in the ready list. */
12630 if (first_older_only
== -1)
12633 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
12637 /* Move first_older_only insn before first_younger. */
12639 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
12640 INSN_UID(ready
[first_older_only
]),
12641 INSN_UID(ready
[first_younger
]));
12642 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
12643 for (i
= first_older_only
; i
< first_younger
; i
++)
12645 ready
[i
] = ready
[i
+1];
12648 ready
[i
] = first_older_only_insn
;
12652 /* Implement TARGET_SCHED_REORDER. */
12654 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
12659 case TARGET_CPU_cortexa7
:
12660 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
12663 /* Do nothing for other cores. */
12667 return arm_issue_rate ();
12670 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12671 It corrects the value of COST based on the relationship between
12672 INSN and DEP through the dependence LINK. It returns the new
12673 value. There is a per-core adjust_cost hook to adjust scheduler costs
12674 and the per-core hook can choose to completely override the generic
12675 adjust_cost function. Only put bits of code into arm_adjust_cost that
12676 are common across all cores. */
12678 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
12683 /* When generating Thumb-1 code, we want to place flag-setting operations
12684 close to a conditional branch which depends on them, so that we can
12685 omit the comparison. */
12688 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
12689 && recog_memoized (dep
) >= 0
12690 && get_attr_conds (dep
) == CONDS_SET
)
12693 if (current_tune
->sched_adjust_cost
!= NULL
)
12695 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
12699 /* XXX Is this strictly true? */
12700 if (dep_type
== REG_DEP_ANTI
12701 || dep_type
== REG_DEP_OUTPUT
)
12704 /* Call insns don't incur a stall, even if they follow a load. */
12709 if ((i_pat
= single_set (insn
)) != NULL
12710 && MEM_P (SET_SRC (i_pat
))
12711 && (d_pat
= single_set (dep
)) != NULL
12712 && MEM_P (SET_DEST (d_pat
)))
12714 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
12715 /* This is a load after a store, there is no conflict if the load reads
12716 from a cached area. Assume that loads from the stack, and from the
12717 constant pool are cached, and that others will miss. This is a
12720 if ((SYMBOL_REF_P (src_mem
)
12721 && CONSTANT_POOL_ADDRESS_P (src_mem
))
12722 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
12723 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
12724 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
12732 arm_max_conditional_execute (void)
12734 return max_insns_skipped
;
12738 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
12741 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12743 return (optimize
> 0) ? 2 : 0;
12747 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12749 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12752 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12753 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12754 sequences of non-executed instructions in IT blocks probably take the same
12755 amount of time as executed instructions (and the IT instruction itself takes
12756 space in icache). This function was experimentally determined to give good
12757 results on a popular embedded benchmark. */
12760 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12762 return (TARGET_32BIT
&& speed_p
) ? 1
12763 : arm_default_branch_cost (speed_p
, predictable_p
);
12767 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
12769 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12772 static bool fp_consts_inited
= false;
12774 static REAL_VALUE_TYPE value_fp0
;
12777 init_fp_table (void)
12781 r
= REAL_VALUE_ATOF ("0", DFmode
);
12783 fp_consts_inited
= true;
12786 /* Return TRUE if rtx X is a valid immediate FP constant. */
12788 arm_const_double_rtx (rtx x
)
12790 const REAL_VALUE_TYPE
*r
;
12792 if (!fp_consts_inited
)
12795 r
= CONST_DOUBLE_REAL_VALUE (x
);
12796 if (REAL_VALUE_MINUS_ZERO (*r
))
12799 if (real_equal (r
, &value_fp0
))
12805 /* VFPv3 has a fairly wide range of representable immediates, formed from
12806 "quarter-precision" floating-point values. These can be evaluated using this
12807 formula (with ^ for exponentiation):
12811 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12812 16 <= n <= 31 and 0 <= r <= 7.
12814 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12816 - A (most-significant) is the sign bit.
12817 - BCD are the exponent (encoded as r XOR 3).
12818 - EFGH are the mantissa (encoded as n - 16).
12821 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12822 fconst[sd] instruction, or -1 if X isn't suitable. */
12824 vfp3_const_double_index (rtx x
)
12826 REAL_VALUE_TYPE r
, m
;
12827 int sign
, exponent
;
12828 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12829 unsigned HOST_WIDE_INT mask
;
12830 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12833 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12836 r
= *CONST_DOUBLE_REAL_VALUE (x
);
12838 /* We can't represent these things, so detect them first. */
12839 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12842 /* Extract sign, exponent and mantissa. */
12843 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12844 r
= real_value_abs (&r
);
12845 exponent
= REAL_EXP (&r
);
12846 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12847 highest (sign) bit, with a fixed binary point at bit point_pos.
12848 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12849 bits for the mantissa, this may fail (low bits would be lost). */
12850 real_ldexp (&m
, &r
, point_pos
- exponent
);
12851 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12852 mantissa
= w
.elt (0);
12853 mant_hi
= w
.elt (1);
12855 /* If there are bits set in the low part of the mantissa, we can't
12856 represent this value. */
12860 /* Now make it so that mantissa contains the most-significant bits, and move
12861 the point_pos to indicate that the least-significant bits have been
12863 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12864 mantissa
= mant_hi
;
12866 /* We can permit four significant bits of mantissa only, plus a high bit
12867 which is always 1. */
12868 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
12869 if ((mantissa
& mask
) != 0)
12872 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12873 mantissa
>>= point_pos
- 5;
12875 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12876 floating-point immediate zero with Neon using an integer-zero load, but
12877 that case is handled elsewhere.) */
12881 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12883 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12884 normalized significands are in the range [1, 2). (Our mantissa is shifted
12885 left 4 places at this point relative to normalized IEEE754 values). GCC
12886 internally uses [0.5, 1) (see real.cc), so the exponent returned from
12887 REAL_EXP must be altered. */
12888 exponent
= 5 - exponent
;
12890 if (exponent
< 0 || exponent
> 7)
12893 /* Sign, mantissa and exponent are now in the correct form to plug into the
12894 formula described in the comment above. */
12895 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12898 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12900 vfp3_const_double_rtx (rtx x
)
12905 return vfp3_const_double_index (x
) != -1;
12908 /* Recognize immediates which can be used in various Neon and MVE instructions.
12909 Legal immediates are described by the following table (for VMVN variants, the
12910 bitwise inverse of the constant shown is recognized. In either case, VMOV
12911 is output and the correct instruction to use for a given constant is chosen
12912 by the assembler). The constant shown is replicated across all elements of
12913 the destination vector.
12915 insn elems variant constant (binary)
12916 ---- ----- ------- -----------------
12917 vmov i32 0 00000000 00000000 00000000 abcdefgh
12918 vmov i32 1 00000000 00000000 abcdefgh 00000000
12919 vmov i32 2 00000000 abcdefgh 00000000 00000000
12920 vmov i32 3 abcdefgh 00000000 00000000 00000000
12921 vmov i16 4 00000000 abcdefgh
12922 vmov i16 5 abcdefgh 00000000
12923 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12924 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12925 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12926 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12927 vmvn i16 10 00000000 abcdefgh
12928 vmvn i16 11 abcdefgh 00000000
12929 vmov i32 12 00000000 00000000 abcdefgh 11111111
12930 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12931 vmov i32 14 00000000 abcdefgh 11111111 11111111
12932 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12933 vmov i8 16 abcdefgh
12934 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12935 eeeeeeee ffffffff gggggggg hhhhhhhh
12936 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12937 vmov f32 19 00000000 00000000 00000000 00000000
12939 For case 18, B = !b. Representable values are exactly those accepted by
12940 vfp3_const_double_index, but are output as floating-point numbers rather
12943 For case 19, we will change it to vmov.i32 when assembling.
12945 Variants 0-5 (inclusive) may also be used as immediates for the second
12946 operand of VORR/VBIC instructions.
12948 The INVERSE argument causes the bitwise inverse of the given operand to be
12949 recognized instead (used for recognizing legal immediates for the VAND/VORN
12950 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12951 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12952 output, rather than the real insns vbic/vorr).
12954 INVERSE makes no difference to the recognition of float vectors.
12956 The return value is the variant of immediate as shown in the above table, or
12957 -1 if the given value doesn't match any of the listed patterns.
12960 simd_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12961 rtx
*modconst
, int *elementwidth
)
12963 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12965 for (i = 0; i < idx; i += (STRIDE)) \
12970 immtype = (CLASS); \
12971 elsize = (ELSIZE); \
12975 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12976 unsigned int innersize
;
12977 unsigned char bytes
[16] = {};
12978 int immtype
= -1, matches
;
12979 unsigned int invmask
= inverse
? 0xff : 0;
12980 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12983 n_elts
= CONST_VECTOR_NUNITS (op
);
12987 gcc_assert (mode
!= VOIDmode
);
12990 innersize
= GET_MODE_UNIT_SIZE (mode
);
12992 /* Only support 128-bit vectors for MVE. */
12993 if (TARGET_HAVE_MVE
12995 || VALID_MVE_PRED_MODE (mode
)
12996 || n_elts
* innersize
!= 16))
12999 if (!TARGET_HAVE_MVE
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_BOOL
)
13002 /* Vectors of float constants. */
13003 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
13005 rtx el0
= CONST_VECTOR_ELT (op
, 0);
13007 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
13010 /* FP16 vectors cannot be represented. */
13011 if (GET_MODE_INNER (mode
) == HFmode
)
13014 /* All elements in the vector must be the same. Note that 0.0 and -0.0
13015 are distinct in this context. */
13016 if (!const_vec_duplicate_p (op
))
13020 *modconst
= CONST_VECTOR_ELT (op
, 0);
13025 if (el0
== CONST0_RTX (GET_MODE (el0
)))
13031 /* The tricks done in the code below apply for little-endian vector layout.
13032 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
13033 FIXME: Implement logic for big-endian vectors. */
13034 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
13037 /* Splat vector constant out into a byte vector. */
13038 for (i
= 0; i
< n_elts
; i
++)
13040 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
13041 unsigned HOST_WIDE_INT elpart
;
13043 gcc_assert (CONST_INT_P (el
));
13044 elpart
= INTVAL (el
);
13046 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
13048 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
13049 elpart
>>= BITS_PER_UNIT
;
13053 /* Sanity check. */
13054 gcc_assert (idx
== GET_MODE_SIZE (mode
));
13058 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
13059 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
13061 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
13062 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
13064 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
13065 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
13067 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
13068 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
13070 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
13072 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
13074 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
13075 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
13077 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
13078 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
13080 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13081 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
13083 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13084 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
13086 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
13088 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
13090 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
13091 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
13093 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
13094 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
13096 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13097 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
13099 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
13100 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
13102 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
13104 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
13105 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
13113 *elementwidth
= elsize
;
13117 unsigned HOST_WIDE_INT imm
= 0;
13119 /* Un-invert bytes of recognized vector, if necessary. */
13121 for (i
= 0; i
< idx
; i
++)
13122 bytes
[i
] ^= invmask
;
13126 /* FIXME: Broken on 32-bit H_W_I hosts. */
13127 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
13129 for (i
= 0; i
< 8; i
++)
13130 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
13131 << (i
* BITS_PER_UNIT
);
13133 *modconst
= GEN_INT (imm
);
13137 unsigned HOST_WIDE_INT imm
= 0;
13139 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
13140 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
13142 *modconst
= GEN_INT (imm
);
13150 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13151 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
13152 (or zero for float elements), and a modified constant (whatever should be
13153 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
13154 modified to "simd_immediate_valid_for_move" as this function will be used
13155 both by neon and mve. */
13157 simd_immediate_valid_for_move (rtx op
, machine_mode mode
,
13158 rtx
*modconst
, int *elementwidth
)
13162 int retval
= simd_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
13168 *modconst
= tmpconst
;
13171 *elementwidth
= tmpwidth
;
13176 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
13177 the immediate is valid, write a constant suitable for using as an operand
13178 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13179 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
13182 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
13183 rtx
*modconst
, int *elementwidth
)
13187 int retval
= simd_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
13189 if (retval
< 0 || retval
> 5)
13193 *modconst
= tmpconst
;
13196 *elementwidth
= tmpwidth
;
13201 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
13202 the immediate is valid, write a constant suitable for using as an operand
13203 to VSHR/VSHL to *MODCONST and the corresponding element width to
13204 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13205 because they have different limitations. */
13208 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
13209 rtx
*modconst
, int *elementwidth
,
13212 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
13213 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
13214 unsigned HOST_WIDE_INT last_elt
= 0;
13215 unsigned HOST_WIDE_INT maxshift
;
13217 /* Split vector constant out into a byte vector. */
13218 for (i
= 0; i
< n_elts
; i
++)
13220 rtx el
= CONST_VECTOR_ELT (op
, i
);
13221 unsigned HOST_WIDE_INT elpart
;
13223 if (CONST_INT_P (el
))
13224 elpart
= INTVAL (el
);
13225 else if (CONST_DOUBLE_P (el
))
13228 gcc_unreachable ();
13230 if (i
!= 0 && elpart
!= last_elt
)
13236 /* Shift less than element size. */
13237 maxshift
= innersize
* 8;
13241 /* Left shift immediate value can be from 0 to <size>-1. */
13242 if (last_elt
>= maxshift
)
13247 /* Right shift immediate value can be from 1 to <size>. */
13248 if (last_elt
== 0 || last_elt
> maxshift
)
13253 *elementwidth
= innersize
* 8;
13256 *modconst
= CONST_VECTOR_ELT (op
, 0);
13261 /* Return a string suitable for output of Neon immediate logic operation
13265 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
13266 int inverse
, int quad
)
13268 int width
, is_valid
;
13269 static char templ
[40];
13271 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
13273 gcc_assert (is_valid
!= 0);
13276 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
13278 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
13283 /* Return a string suitable for output of Neon immediate shift operation
13284 (VSHR or VSHL) MNEM. */
13287 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
13288 machine_mode mode
, int quad
,
13291 int width
, is_valid
;
13292 static char templ
[40];
13294 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
13295 gcc_assert (is_valid
!= 0);
13298 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
13300 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
13305 /* Output a sequence of pairwise operations to implement a reduction.
13306 NOTE: We do "too much work" here, because pairwise operations work on two
13307 registers-worth of operands in one go. Unfortunately we can't exploit those
13308 extra calculations to do the full operation in fewer steps, I don't think.
13309 Although all vector elements of the result but the first are ignored, we
13310 actually calculate the same result in each of the elements. An alternative
13311 such as initially loading a vector with zero to use as each of the second
13312 operands would use up an additional register and take an extra instruction,
13313 for no particular gain. */
13316 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
13317 rtx (*reduc
) (rtx
, rtx
, rtx
))
13319 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
13322 for (i
= parts
/ 2; i
>= 1; i
/= 2)
13324 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
13325 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
13330 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13331 loaded into a register using VDUP.
13333 If this is the case, and GENERATE is set, we also generate
13334 instructions to do this and return an RTX to assign to the register. */
13337 neon_vdup_constant (rtx vals
, bool generate
)
13339 machine_mode mode
= GET_MODE (vals
);
13340 machine_mode inner_mode
= GET_MODE_INNER (mode
);
13343 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
13346 if (!const_vec_duplicate_p (vals
, &x
))
13347 /* The elements are not all the same. We could handle repeating
13348 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13349 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13356 /* We can load this constant by using VDUP and a constant in a
13357 single ARM register. This will be cheaper than a vector
13360 x
= copy_to_mode_reg (inner_mode
, x
);
13361 return gen_vec_duplicate (mode
, x
);
13364 /* Return a HI representation of CONST_VEC suitable for MVE predicates. */
13366 mve_bool_vec_to_const (rtx const_vec
)
13368 machine_mode mode
= GET_MODE (const_vec
);
13370 if (!VECTOR_MODE_P (mode
))
13373 unsigned n_elts
= GET_MODE_NUNITS (mode
);
13374 unsigned el_prec
= GET_MODE_PRECISION (GET_MODE_INNER (mode
));
13375 unsigned shift_c
= 16 / n_elts
;
13379 for (i
= 0; i
< n_elts
; i
++)
13381 rtx el
= CONST_VECTOR_ELT (const_vec
, i
);
13382 unsigned HOST_WIDE_INT elpart
;
13384 gcc_assert (CONST_INT_P (el
));
13385 elpart
= INTVAL (el
) & ((1U << el_prec
) - 1);
13387 unsigned index
= BYTES_BIG_ENDIAN
? n_elts
- i
- 1 : i
;
13389 hi_val
|= elpart
<< (index
* shift_c
);
13391 /* We are using mov immediate to encode this constant which writes 32-bits
13392 so we need to make sure the top 16-bits are all 0, otherwise we can't
13393 guarantee we can actually write this immediate. */
13394 return gen_int_mode (hi_val
, SImode
);
13397 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13398 constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13401 If this is the case, and GENERATE is set, we also generate code to do
13402 this and return an RTX to copy into the register. */
13405 neon_make_constant (rtx vals
, bool generate
)
13407 machine_mode mode
= GET_MODE (vals
);
13409 rtx const_vec
= NULL_RTX
;
13410 int n_elts
= GET_MODE_NUNITS (mode
);
13414 if (GET_CODE (vals
) == CONST_VECTOR
)
13416 else if (GET_CODE (vals
) == PARALLEL
)
13418 /* A CONST_VECTOR must contain only CONST_INTs and
13419 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13420 Only store valid constants in a CONST_VECTOR. */
13421 for (i
= 0; i
< n_elts
; ++i
)
13423 rtx x
= XVECEXP (vals
, 0, i
);
13424 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
13427 if (n_const
== n_elts
)
13428 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
13431 gcc_unreachable ();
13433 if (const_vec
!= NULL
13434 && simd_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
13435 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13437 else if (TARGET_HAVE_MVE
&& VALID_MVE_PRED_MODE(mode
))
13438 return mve_bool_vec_to_const (const_vec
);
13439 else if ((target
= neon_vdup_constant (vals
, generate
)) != NULL_RTX
)
13440 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13441 pipeline cycle; creating the constant takes one or two ARM
13442 pipeline cycles. */
13444 else if (const_vec
!= NULL_RTX
)
13445 /* Load from constant pool. On Cortex-A8 this takes two cycles
13446 (for either double or quad vectors). We cannot take advantage
13447 of single-cycle VLD1 because we need a PC-relative addressing
13449 return arm_disable_literal_pool
? NULL_RTX
: const_vec
;
13451 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13452 We cannot construct an initializer. */
13456 /* Initialize vector TARGET to VALS. */
13459 neon_expand_vector_init (rtx target
, rtx vals
)
13461 machine_mode mode
= GET_MODE (target
);
13462 machine_mode inner_mode
= GET_MODE_INNER (mode
);
13463 int n_elts
= GET_MODE_NUNITS (mode
);
13464 int n_var
= 0, one_var
= -1;
13465 bool all_same
= true;
13469 for (i
= 0; i
< n_elts
; ++i
)
13471 x
= XVECEXP (vals
, 0, i
);
13472 if (!CONSTANT_P (x
))
13473 ++n_var
, one_var
= i
;
13475 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
13481 rtx constant
= neon_make_constant (vals
);
13482 if (constant
!= NULL_RTX
)
13484 emit_move_insn (target
, constant
);
13489 /* Splat a single non-constant element if we can. */
13490 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
13492 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
13493 emit_insn (gen_rtx_SET (target
, gen_vec_duplicate (mode
, x
)));
13497 /* One field is non-constant. Load constant then overwrite varying
13498 field. This is more efficient than using the stack. */
13501 rtx copy
= copy_rtx (vals
);
13502 rtx merge_mask
= GEN_INT (1 << one_var
);
13504 /* Load constant part of vector, substitute neighboring value for
13505 varying element. */
13506 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
13507 neon_expand_vector_init (target
, copy
);
13509 /* Insert variable. */
13510 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
13511 emit_insn (gen_vec_set_internal (mode
, target
, x
, merge_mask
, target
));
13515 /* Construct the vector in memory one field at a time
13516 and load the whole vector. */
13517 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
13518 for (i
= 0; i
< n_elts
; i
++)
13519 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
13520 i
* GET_MODE_SIZE (inner_mode
)),
13521 XVECEXP (vals
, 0, i
));
13522 emit_move_insn (target
, mem
);
13525 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13526 ERR if it doesn't. EXP indicates the source location, which includes the
13527 inlining history for intrinsics. */
13530 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
13531 const_tree exp
, const char *desc
)
13533 HOST_WIDE_INT lane
;
13535 gcc_assert (CONST_INT_P (operand
));
13537 lane
= INTVAL (operand
);
13539 if (lane
< low
|| lane
>= high
)
13542 error_at (EXPR_LOCATION (exp
),
13543 "%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
13545 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
13549 /* Bounds-check lanes. */
13552 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
13555 bounds_check (operand
, low
, high
, exp
, "lane");
13558 /* Bounds-check constants. */
13561 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
13563 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
13567 neon_element_bits (machine_mode mode
)
13569 return GET_MODE_UNIT_BITSIZE (mode
);
13573 /* Predicates for `match_operand' and `match_operator'. */
13575 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13576 WB level is 2 if full writeback address modes are allowed, 1
13577 if limited writeback address modes (POST_INC and PRE_DEC) are
13578 allowed and 0 if no writeback at all is supported. */
13581 arm_coproc_mem_operand_wb (rtx op
, int wb_level
)
13583 gcc_assert (wb_level
== 0 || wb_level
== 1 || wb_level
== 2);
13586 /* Reject eliminable registers. */
13587 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
13588 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13589 || reg_mentioned_p (arg_pointer_rtx
, op
)
13590 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13591 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13592 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13593 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13596 /* Constants are converted into offsets from labels. */
13600 ind
= XEXP (op
, 0);
13602 if (reload_completed
13603 && (LABEL_REF_P (ind
)
13604 || (GET_CODE (ind
) == CONST
13605 && GET_CODE (XEXP (ind
, 0)) == PLUS
13606 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13607 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13610 /* Match: (mem (reg)). */
13612 return arm_address_register_rtx_p (ind
, 0);
13614 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13615 acceptable in any case (subject to verification by
13616 arm_address_register_rtx_p). We need full writeback to accept
13617 PRE_INC and POST_DEC, and at least restricted writeback for
13618 PRE_INC and POST_DEC. */
13620 && (GET_CODE (ind
) == POST_INC
13621 || GET_CODE (ind
) == PRE_DEC
13623 && (GET_CODE (ind
) == PRE_INC
13624 || GET_CODE (ind
) == POST_DEC
))))
13625 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13628 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
13629 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
13630 && GET_CODE (XEXP (ind
, 1)) == PLUS
13631 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
13632 ind
= XEXP (ind
, 1);
13638 The encoded immediate for 16-bit modes is multiplied by 2,
13639 while the encoded immediate for 32-bit and 64-bit modes is
13640 multiplied by 4. */
13641 int factor
= MIN (GET_MODE_SIZE (GET_MODE (op
)), 4);
13642 if (GET_CODE (ind
) == PLUS
13643 && REG_P (XEXP (ind
, 0))
13644 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13645 && CONST_INT_P (XEXP (ind
, 1))
13646 && IN_RANGE (INTVAL (XEXP (ind
, 1)), -255 * factor
, 255 * factor
)
13647 && (INTVAL (XEXP (ind
, 1)) & (factor
- 1)) == 0)
13653 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13654 WB is true if full writeback address modes are allowed and is false
13655 if limited writeback address modes (POST_INC and PRE_DEC) are
13658 int arm_coproc_mem_operand (rtx op
, bool wb
)
13660 return arm_coproc_mem_operand_wb (op
, wb
? 2 : 1);
13663 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13664 context in which no writeback address modes are allowed. */
13667 arm_coproc_mem_operand_no_writeback (rtx op
)
13669 return arm_coproc_mem_operand_wb (op
, 0);
13672 /* In non-STRICT mode, return the register number; in STRICT mode return
13673 the hard regno or the replacement if it won't be a mem. Otherwise, return
13674 the original pseudo number. */
13676 arm_effective_regno (rtx op
, bool strict
)
13678 gcc_assert (REG_P (op
));
13679 if (!strict
|| REGNO (op
) < FIRST_PSEUDO_REGISTER
13680 || !reg_renumber
|| reg_renumber
[REGNO (op
)] < 0)
13682 return reg_renumber
[REGNO (op
)];
13685 /* This function returns TRUE on matching mode and op.
13686 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13687 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13689 mve_vector_mem_operand (machine_mode mode
, rtx op
, bool strict
)
13691 enum rtx_code code
;
13694 /* Match: (mem (reg)). */
13697 reg_no
= arm_effective_regno (op
, strict
);
13698 return (((mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
)
13699 ? reg_no
<= LAST_LO_REGNUM
13700 : reg_no
< LAST_ARM_REGNUM
)
13701 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13703 code
= GET_CODE (op
);
13705 if ((code
== POST_INC
13708 || code
== POST_DEC
)
13709 && REG_P (XEXP (op
, 0)))
13711 reg_no
= arm_effective_regno (XEXP (op
, 0), strict
);
13712 return (((mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
)
13713 ? reg_no
<= LAST_LO_REGNUM
13714 :(reg_no
< LAST_ARM_REGNUM
&& reg_no
!= SP_REGNUM
))
13715 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13717 else if (((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
13718 && GET_CODE (XEXP (op
, 1)) == PLUS
13719 && XEXP (op
, 0) == XEXP (XEXP (op
, 1), 0)
13720 && REG_P (XEXP (op
, 0))
13721 && GET_CODE (XEXP (XEXP (op
, 1), 1)) == CONST_INT
)
13722 /* Make sure to only accept PLUS after reload_completed, otherwise
13723 this will interfere with auto_inc's pattern detection. */
13724 || (reload_completed
&& code
== PLUS
&& REG_P (XEXP (op
, 0))
13725 && GET_CODE (XEXP (op
, 1)) == CONST_INT
))
13727 reg_no
= arm_effective_regno (XEXP (op
, 0), strict
);
13729 val
= INTVAL (XEXP (op
, 1));
13731 val
= INTVAL (XEXP(XEXP (op
, 1), 1));
13738 if (abs (val
) > 127)
13745 if (val
% 2 != 0 || abs (val
) > 254)
13750 if (val
% 4 != 0 || abs (val
) > 508)
13756 return ((!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
)
13757 || (MVE_STN_LDW_MODE (mode
)
13758 ? reg_no
<= LAST_LO_REGNUM
13759 : (reg_no
< LAST_ARM_REGNUM
13760 && (code
== PLUS
|| reg_no
!= SP_REGNUM
))));
13765 /* Return TRUE if OP is a memory operand which we can load or store a vector
13766 to/from. TYPE is one of the following values:
13767 0 - Vector load/stor (vldr)
13768 1 - Core registers (ldm)
13769 2 - Element/structure loads (vld1)
13772 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
13776 /* Reject eliminable registers. */
13777 if (strict
&& ! (reload_in_progress
|| reload_completed
)
13778 && (reg_mentioned_p (frame_pointer_rtx
, op
)
13779 || reg_mentioned_p (arg_pointer_rtx
, op
)
13780 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13781 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13782 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13783 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13786 /* Constants are converted into offsets from labels. */
13790 ind
= XEXP (op
, 0);
13792 if (reload_completed
13793 && (LABEL_REF_P (ind
)
13794 || (GET_CODE (ind
) == CONST
13795 && GET_CODE (XEXP (ind
, 0)) == PLUS
13796 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13797 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13800 /* Match: (mem (reg)). */
13802 return arm_address_register_rtx_p (ind
, 0);
13804 /* Allow post-increment with Neon registers. */
13805 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
13806 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
13807 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13809 /* Allow post-increment by register for VLDn */
13810 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
13811 && GET_CODE (XEXP (ind
, 1)) == PLUS
13812 && REG_P (XEXP (XEXP (ind
, 1), 1))
13813 && REG_P (XEXP (ind
, 0))
13814 && rtx_equal_p (XEXP (ind
, 0), XEXP (XEXP (ind
, 1), 0)))
13821 && GET_CODE (ind
) == PLUS
13822 && REG_P (XEXP (ind
, 0))
13823 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13824 && CONST_INT_P (XEXP (ind
, 1))
13825 && INTVAL (XEXP (ind
, 1)) > -1024
13826 /* For quad modes, we restrict the constant offset to be slightly less
13827 than what the instruction format permits. We have no such constraint
13828 on double mode offsets. (This must match arm_legitimate_index_p.) */
13829 && (INTVAL (XEXP (ind
, 1))
13830 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
13831 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13837 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13840 mve_struct_mem_operand (rtx op
)
13842 rtx ind
= XEXP (op
, 0);
13844 /* Match: (mem (reg)). */
13846 return arm_address_register_rtx_p (ind
, 0);
13848 /* Allow only post-increment by the mode size. */
13849 if (GET_CODE (ind
) == POST_INC
)
13850 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13855 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13858 neon_struct_mem_operand (rtx op
)
13862 /* Reject eliminable registers. */
13863 if (! (reload_in_progress
|| reload_completed
)
13864 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13865 || reg_mentioned_p (arg_pointer_rtx
, op
)
13866 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13867 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13868 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13869 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13872 /* Constants are converted into offsets from labels. */
13876 ind
= XEXP (op
, 0);
13878 if (reload_completed
13879 && (LABEL_REF_P (ind
)
13880 || (GET_CODE (ind
) == CONST
13881 && GET_CODE (XEXP (ind
, 0)) == PLUS
13882 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13883 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13886 /* Match: (mem (reg)). */
13888 return arm_address_register_rtx_p (ind
, 0);
13890 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13891 if (GET_CODE (ind
) == POST_INC
13892 || GET_CODE (ind
) == PRE_DEC
)
13893 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13898 /* Prepares the operands for the VCMLA by lane instruction such that the right
13899 register number is selected. This instruction is special in that it always
13900 requires a D register, however there is a choice to be made between Dn[0],
13901 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13903 The VCMLA by lane function always selects two values. For instance given D0
13904 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13905 used by the instruction. However given V4SF then index 0 and 1 are valid as
13906 D0[0] or D1[0] are both valid.
13908 This function centralizes that information based on OPERANDS, OPERANDS[3]
13909 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13910 updated to contain the right index. */
13913 neon_vcmla_lane_prepare_operands (rtx
*operands
)
13915 int lane
= INTVAL (operands
[4]);
13916 machine_mode constmode
= SImode
;
13917 machine_mode mode
= GET_MODE (operands
[3]);
13918 int regno
= REGNO (operands
[3]);
13919 regno
= ((regno
- FIRST_VFP_REGNUM
) >> 1);
13920 if (lane
> 0 && lane
>= GET_MODE_NUNITS (mode
) / 4)
13922 operands
[3] = gen_int_mode (regno
+ 1, constmode
);
13924 = gen_int_mode (lane
- GET_MODE_NUNITS (mode
) / 4, constmode
);
13928 operands
[3] = gen_int_mode (regno
, constmode
);
13929 operands
[4] = gen_int_mode (lane
, constmode
);
13935 /* Return true if X is a register that will be eliminated later on. */
13937 arm_eliminable_register (rtx x
)
13939 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13940 || REGNO (x
) == ARG_POINTER_REGNUM
13941 || VIRTUAL_REGISTER_P (x
));
13944 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13945 coprocessor registers. Otherwise return NO_REGS. */
13948 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13950 if (mode
== HFmode
)
13952 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
13953 return GENERAL_REGS
;
13954 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13956 return GENERAL_REGS
;
13959 /* The neon move patterns handle all legitimate vector and struct
13962 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13963 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13964 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13965 || VALID_NEON_STRUCT_MODE (mode
)))
13968 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13971 return GENERAL_REGS
;
13974 /* Values which must be returned in the most-significant end of the return
13978 arm_return_in_msb (const_tree valtype
)
13980 return (TARGET_AAPCS_BASED
13981 && BYTES_BIG_ENDIAN
13982 && (AGGREGATE_TYPE_P (valtype
)
13983 || TREE_CODE (valtype
) == COMPLEX_TYPE
13984 || FIXED_POINT_TYPE_P (valtype
)));
13987 /* Return TRUE if X references a SYMBOL_REF. */
13989 symbol_mentioned_p (rtx x
)
13994 if (SYMBOL_REF_P (x
))
13997 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13998 are constant offsets, not symbols. */
13999 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
14002 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
14004 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
14010 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
14011 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
14014 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
14021 /* Return TRUE if X references a LABEL_REF. */
14023 label_mentioned_p (rtx x
)
14028 if (LABEL_REF_P (x
))
14031 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
14032 instruction, but they are constant offsets, not symbols. */
14033 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
14036 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
14037 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
14043 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
14044 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
14047 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
14055 tls_mentioned_p (rtx x
)
14057 switch (GET_CODE (x
))
14060 return tls_mentioned_p (XEXP (x
, 0));
14063 if (XINT (x
, 1) == UNSPEC_TLS
)
14066 /* Fall through. */
14072 /* Must not copy any rtx that uses a pc-relative address.
14073 Also, disallow copying of load-exclusive instructions that
14074 may appear after splitting of compare-and-swap-style operations
14075 so as to prevent those loops from being transformed away from their
14076 canonical forms (see PR 69904). */
14079 arm_cannot_copy_insn_p (rtx_insn
*insn
)
14081 /* The tls call insn cannot be copied, as it is paired with a data
14083 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
14086 subrtx_iterator::array_type array
;
14087 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
14089 const_rtx x
= *iter
;
14090 if (GET_CODE (x
) == UNSPEC
14091 && (XINT (x
, 1) == UNSPEC_PIC_BASE
14092 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
14096 rtx set
= single_set (insn
);
14099 rtx src
= SET_SRC (set
);
14100 if (GET_CODE (src
) == ZERO_EXTEND
)
14101 src
= XEXP (src
, 0);
14103 /* Catch the load-exclusive and load-acquire operations. */
14104 if (GET_CODE (src
) == UNSPEC_VOLATILE
14105 && (XINT (src
, 1) == VUNSPEC_LL
14106 || XINT (src
, 1) == VUNSPEC_LAX
))
14113 minmax_code (rtx x
)
14115 enum rtx_code code
= GET_CODE (x
);
14128 gcc_unreachable ();
14132 /* Match pair of min/max operators that can be implemented via usat/ssat. */
14135 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
14136 int *mask
, bool *signed_sat
)
14138 /* The high bound must be a power of two minus one. */
14139 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
14143 /* The low bound is either zero (for usat) or one less than the
14144 negation of the high bound (for ssat). */
14145 if (INTVAL (lo_bound
) == 0)
14150 *signed_sat
= false;
14155 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
14160 *signed_sat
= true;
14168 /* Return 1 if memory locations are adjacent. */
14170 adjacent_mem_locations (rtx a
, rtx b
)
14172 /* We don't guarantee to preserve the order of these memory refs. */
14173 if (volatile_refs_p (a
) || volatile_refs_p (b
))
14176 if ((REG_P (XEXP (a
, 0))
14177 || (GET_CODE (XEXP (a
, 0)) == PLUS
14178 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
14179 && (REG_P (XEXP (b
, 0))
14180 || (GET_CODE (XEXP (b
, 0)) == PLUS
14181 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
14183 HOST_WIDE_INT val0
= 0, val1
= 0;
14187 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
14189 reg0
= XEXP (XEXP (a
, 0), 0);
14190 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
14193 reg0
= XEXP (a
, 0);
14195 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
14197 reg1
= XEXP (XEXP (b
, 0), 0);
14198 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
14201 reg1
= XEXP (b
, 0);
14203 /* Don't accept any offset that will require multiple
14204 instructions to handle, since this would cause the
14205 arith_adjacentmem pattern to output an overlong sequence. */
14206 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
14209 /* Don't allow an eliminable register: register elimination can make
14210 the offset too large. */
14211 if (arm_eliminable_register (reg0
))
14214 val_diff
= val1
- val0
;
14218 /* If the target has load delay slots, then there's no benefit
14219 to using an ldm instruction unless the offset is zero and
14220 we are optimizing for size. */
14221 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
14222 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
14223 && (val_diff
== 4 || val_diff
== -4));
14226 return ((REGNO (reg0
) == REGNO (reg1
))
14227 && (val_diff
== 4 || val_diff
== -4));
14233 /* Return true if OP is a valid load or store multiple operation. LOAD is true
14234 for load operations, false for store operations. CONSECUTIVE is true
14235 if the register numbers in the operation must be consecutive in the register
14236 bank. RETURN_PC is true if value is to be loaded in PC.
14237 The pattern we are trying to match for load is:
14238 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14239 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14242 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14245 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14246 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14247 3. If consecutive is TRUE, then for kth register being loaded,
14248 REGNO (R_dk) = REGNO (R_d0) + k.
14249 The pattern for store is similar. */
14251 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
14252 bool consecutive
, bool return_pc
)
14254 HOST_WIDE_INT count
= XVECLEN (op
, 0);
14255 rtx reg
, mem
, addr
;
14257 unsigned first_regno
;
14258 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
14260 bool addr_reg_in_reglist
= false;
14261 bool update
= false;
14266 /* If not in SImode, then registers must be consecutive
14267 (e.g., VLDM instructions for DFmode). */
14268 gcc_assert ((mode
== SImode
) || consecutive
);
14269 /* Setting return_pc for stores is illegal. */
14270 gcc_assert (!return_pc
|| load
);
14272 /* Set up the increments and the regs per val based on the mode. */
14273 reg_increment
= GET_MODE_SIZE (mode
);
14274 regs_per_val
= reg_increment
/ 4;
14275 offset_adj
= return_pc
? 1 : 0;
14278 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
14279 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
14282 /* Check if this is a write-back. */
14283 elt
= XVECEXP (op
, 0, offset_adj
);
14284 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
14290 /* The offset adjustment must be the number of registers being
14291 popped times the size of a single register. */
14292 if (!REG_P (SET_DEST (elt
))
14293 || !REG_P (XEXP (SET_SRC (elt
), 0))
14294 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
14295 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
14296 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
14297 ((count
- 1 - offset_adj
) * reg_increment
))
14301 i
= i
+ offset_adj
;
14302 base
= base
+ offset_adj
;
14303 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14304 success depends on the type: VLDM can do just one reg,
14305 LDM must do at least two. */
14306 if ((count
<= i
) && (mode
== SImode
))
14309 elt
= XVECEXP (op
, 0, i
- 1);
14310 if (GET_CODE (elt
) != SET
)
14315 reg
= SET_DEST (elt
);
14316 mem
= SET_SRC (elt
);
14320 reg
= SET_SRC (elt
);
14321 mem
= SET_DEST (elt
);
14324 if (!REG_P (reg
) || !MEM_P (mem
))
14327 regno
= REGNO (reg
);
14328 first_regno
= regno
;
14329 addr
= XEXP (mem
, 0);
14330 if (GET_CODE (addr
) == PLUS
)
14332 if (!CONST_INT_P (XEXP (addr
, 1)))
14335 offset
= INTVAL (XEXP (addr
, 1));
14336 addr
= XEXP (addr
, 0);
14342 /* Don't allow SP to be loaded unless it is also the base register. It
14343 guarantees that SP is reset correctly when an LDM instruction
14344 is interrupted. Otherwise, we might end up with a corrupt stack. */
14345 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
14348 if (regno
== REGNO (addr
))
14349 addr_reg_in_reglist
= true;
14351 for (; i
< count
; i
++)
14353 elt
= XVECEXP (op
, 0, i
);
14354 if (GET_CODE (elt
) != SET
)
14359 reg
= SET_DEST (elt
);
14360 mem
= SET_SRC (elt
);
14364 reg
= SET_SRC (elt
);
14365 mem
= SET_DEST (elt
);
14369 || GET_MODE (reg
) != mode
14370 || REGNO (reg
) <= regno
14373 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
14374 /* Don't allow SP to be loaded unless it is also the base register. It
14375 guarantees that SP is reset correctly when an LDM instruction
14376 is interrupted. Otherwise, we might end up with a corrupt stack. */
14377 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
14379 || GET_MODE (mem
) != mode
14380 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
14381 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
14382 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
14383 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
14384 offset
+ (i
- base
) * reg_increment
))
14385 && (!REG_P (XEXP (mem
, 0))
14386 || offset
+ (i
- base
) * reg_increment
!= 0)))
14389 regno
= REGNO (reg
);
14390 if (regno
== REGNO (addr
))
14391 addr_reg_in_reglist
= true;
14396 if (update
&& addr_reg_in_reglist
)
14399 /* For Thumb-1, address register is always modified - either by write-back
14400 or by explicit load. If the pattern does not describe an update,
14401 then the address register must be in the list of loaded registers. */
14403 return update
|| addr_reg_in_reglist
;
14409 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14410 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14413 [(set (reg:SI <N>) (const_int 0))
14414 (set (reg:SI <M>) (const_int 0))
14416 (unspec_volatile [(const_int 0)]
14418 (clobber (reg:CC CC_REGNUM))
14421 Any number (including 0) of set expressions is valid, the volatile unspec is
14422 optional. All registers but SP and PC are allowed and registers must be in
14423 strict increasing order.
14425 To be a valid VSCCLRM pattern, OP must have the following form:
14427 [(unspec_volatile [(const_int 0)]
14428 VUNSPEC_VSCCLRM_VPR)
14429 (set (reg:SF <N>) (const_int 0))
14430 (set (reg:SF <M>) (const_int 0))
14434 As with CLRM, any number (including 0) of set expressions is valid, however
14435 the volatile unspec is mandatory here. Any VFP single-precision register is
14436 accepted but all registers must be consecutive and in increasing order. */
14439 clear_operation_p (rtx op
, bool vfp
)
14442 unsigned last_regno
= INVALID_REGNUM
;
14443 rtx elt
, reg
, zero
;
14444 int count
= XVECLEN (op
, 0);
14445 int first_set
= vfp
? 1 : 0;
14446 machine_mode expected_mode
= vfp
? E_SFmode
: E_SImode
;
14448 for (int i
= first_set
; i
< count
; i
++)
14450 elt
= XVECEXP (op
, 0, i
);
14452 if (!vfp
&& GET_CODE (elt
) == UNSPEC_VOLATILE
)
14454 if (XINT (elt
, 1) != VUNSPEC_CLRM_APSR
14455 || XVECLEN (elt
, 0) != 1
14456 || XVECEXP (elt
, 0, 0) != CONST0_RTX (SImode
)
14463 if (GET_CODE (elt
) == CLOBBER
)
14466 if (GET_CODE (elt
) != SET
)
14469 reg
= SET_DEST (elt
);
14470 zero
= SET_SRC (elt
);
14473 || GET_MODE (reg
) != expected_mode
14474 || zero
!= CONST0_RTX (SImode
))
14477 regno
= REGNO (reg
);
14481 if (i
!= first_set
&& regno
!= last_regno
+ 1)
14486 if (regno
== SP_REGNUM
|| regno
== PC_REGNUM
)
14488 if (i
!= first_set
&& regno
<= last_regno
)
14492 last_regno
= regno
;
14498 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14499 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14500 instruction. ADD_OFFSET is nonzero if the base address register needs
14501 to be modified with an add instruction before we can use it. */
14504 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
14505 int nops
, HOST_WIDE_INT add_offset
)
14507 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14508 if the offset isn't small enough. The reason 2 ldrs are faster
14509 is because these ARMs are able to do more than one cache access
14510 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14511 whilst the ARM8 has a double bandwidth cache. This means that
14512 these cores can do both an instruction fetch and a data fetch in
14513 a single cycle, so the trick of calculating the address into a
14514 scratch register (one of the result regs) and then doing a load
14515 multiple actually becomes slower (and no smaller in code size).
14516 That is the transformation
14518 ldr rd1, [rbase + offset]
14519 ldr rd2, [rbase + offset + 4]
14523 add rd1, rbase, offset
14524 ldmia rd1, {rd1, rd2}
14526 produces worse code -- '3 cycles + any stalls on rd2' instead of
14527 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14528 access per cycle, the first sequence could never complete in less
14529 than 6 cycles, whereas the ldm sequence would only take 5 and
14530 would make better use of sequential accesses if not hitting the
14533 We cheat here and test 'arm_ld_sched' which we currently know to
14534 only be true for the ARM8, ARM9 and StrongARM. If this ever
14535 changes, then the test below needs to be reworked. */
14536 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
14539 /* XScale has load-store double instructions, but they have stricter
14540 alignment requirements than load-store multiple, so we cannot
14543 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14544 the pipeline until completion.
14552 An ldr instruction takes 1-3 cycles, but does not block the
14561 Best case ldr will always win. However, the more ldr instructions
14562 we issue, the less likely we are to be able to schedule them well.
14563 Using ldr instructions also increases code size.
14565 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14566 for counts of 3 or 4 regs. */
14567 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
14572 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14573 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14574 an array ORDER which describes the sequence to use when accessing the
14575 offsets that produces an ascending order. In this sequence, each
14576 offset must be larger by exactly 4 than the previous one. ORDER[0]
14577 must have been filled in with the lowest offset by the caller.
14578 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14579 we use to verify that ORDER produces an ascending order of registers.
14580 Return true if it was possible to construct such an order, false if
14584 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
14585 int *unsorted_regs
)
14588 for (i
= 1; i
< nops
; i
++)
14592 order
[i
] = order
[i
- 1];
14593 for (j
= 0; j
< nops
; j
++)
14594 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
14596 /* We must find exactly one offset that is higher than the
14597 previous one by 4. */
14598 if (order
[i
] != order
[i
- 1])
14602 if (order
[i
] == order
[i
- 1])
14604 /* The register numbers must be ascending. */
14605 if (unsorted_regs
!= NULL
14606 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
14612 /* Used to determine in a peephole whether a sequence of load
14613 instructions can be changed into a load-multiple instruction.
14614 NOPS is the number of separate load instructions we are examining. The
14615 first NOPS entries in OPERANDS are the destination registers, the
14616 next NOPS entries are memory operands. If this function is
14617 successful, *BASE is set to the common base register of the memory
14618 accesses; *LOAD_OFFSET is set to the first memory location's offset
14619 from that base register.
14620 REGS is an array filled in with the destination register numbers.
14621 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14622 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14623 the sequence of registers in REGS matches the loads from ascending memory
14624 locations, and the function verifies that the register numbers are
14625 themselves ascending. If CHECK_REGS is false, the register numbers
14626 are stored in the order they are found in the operands. */
14628 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
14629 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
14631 int unsorted_regs
[MAX_LDM_STM_OPS
];
14632 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
14633 int order
[MAX_LDM_STM_OPS
];
14637 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14638 easily extended if required. */
14639 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
14641 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
14643 /* Loop over the operands and check that the memory references are
14644 suitable (i.e. immediate offsets from the same base register). At
14645 the same time, extract the target register, and the memory
14647 for (i
= 0; i
< nops
; i
++)
14652 /* Convert a subreg of a mem into the mem itself. */
14653 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
14654 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
14656 gcc_assert (MEM_P (operands
[nops
+ i
]));
14658 /* Don't reorder volatile memory references; it doesn't seem worth
14659 looking for the case where the order is ok anyway. */
14660 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
14663 offset
= const0_rtx
;
14665 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
14667 && REG_P (reg
= SUBREG_REG (reg
))))
14668 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
14669 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
14671 && REG_P (reg
= SUBREG_REG (reg
))))
14672 && (CONST_INT_P (offset
14673 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
14677 base_reg
= REGNO (reg
);
14678 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
14681 else if (base_reg
!= (int) REGNO (reg
))
14682 /* Not addressed from the same base register. */
14685 unsorted_regs
[i
] = (REG_P (operands
[i
])
14686 ? REGNO (operands
[i
])
14687 : REGNO (SUBREG_REG (operands
[i
])));
14689 /* If it isn't an integer register, or if it overwrites the
14690 base register but isn't the last insn in the list, then
14691 we can't do this. */
14692 if (unsorted_regs
[i
] < 0
14693 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
14694 || unsorted_regs
[i
] > 14
14695 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
14698 /* Don't allow SP to be loaded unless it is also the base
14699 register. It guarantees that SP is reset correctly when
14700 an LDM instruction is interrupted. Otherwise, we might
14701 end up with a corrupt stack. */
14702 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
14705 unsorted_offsets
[i
] = INTVAL (offset
);
14706 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
14710 /* Not a suitable memory address. */
14714 /* All the useful information has now been extracted from the
14715 operands into unsorted_regs and unsorted_offsets; additionally,
14716 order[0] has been set to the lowest offset in the list. Sort
14717 the offsets into order, verifying that they are adjacent, and
14718 check that the register numbers are ascending. */
14719 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
14720 check_regs
? unsorted_regs
: NULL
))
14724 memcpy (saved_order
, order
, sizeof order
);
14730 for (i
= 0; i
< nops
; i
++)
14731 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
14733 *load_offset
= unsorted_offsets
[order
[0]];
14736 if (unsorted_offsets
[order
[0]] == 0)
14737 ldm_case
= 1; /* ldmia */
14738 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14739 ldm_case
= 2; /* ldmib */
14740 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14741 ldm_case
= 3; /* ldmda */
14742 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14743 ldm_case
= 4; /* ldmdb */
14744 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
14745 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
14750 if (!multiple_operation_profitable_p (false, nops
,
14752 ? unsorted_offsets
[order
[0]] : 0))
14758 /* Used to determine in a peephole whether a sequence of store instructions can
14759 be changed into a store-multiple instruction.
14760 NOPS is the number of separate store instructions we are examining.
14761 NOPS_TOTAL is the total number of instructions recognized by the peephole
14763 The first NOPS entries in OPERANDS are the source registers, the next
14764 NOPS entries are memory operands. If this function is successful, *BASE is
14765 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14766 to the first memory location's offset from that base register. REGS is an
14767 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14768 likewise filled with the corresponding rtx's.
14769 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14770 numbers to an ascending order of stores.
14771 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14772 from ascending memory locations, and the function verifies that the register
14773 numbers are themselves ascending. If CHECK_REGS is false, the register
14774 numbers are stored in the order they are found in the operands. */
14776 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
14777 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
14778 HOST_WIDE_INT
*load_offset
, bool check_regs
)
14780 int unsorted_regs
[MAX_LDM_STM_OPS
];
14781 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
14782 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
14783 int order
[MAX_LDM_STM_OPS
];
14785 rtx base_reg_rtx
= NULL
;
14788 /* Write back of base register is currently only supported for Thumb 1. */
14789 int base_writeback
= TARGET_THUMB1
;
14791 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14792 easily extended if required. */
14793 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
14795 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
14797 /* Loop over the operands and check that the memory references are
14798 suitable (i.e. immediate offsets from the same base register). At
14799 the same time, extract the target register, and the memory
14801 for (i
= 0; i
< nops
; i
++)
14806 /* Convert a subreg of a mem into the mem itself. */
14807 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
14808 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
14810 gcc_assert (MEM_P (operands
[nops
+ i
]));
14812 /* Don't reorder volatile memory references; it doesn't seem worth
14813 looking for the case where the order is ok anyway. */
14814 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
14817 offset
= const0_rtx
;
14819 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
14821 && REG_P (reg
= SUBREG_REG (reg
))))
14822 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
14823 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
14825 && REG_P (reg
= SUBREG_REG (reg
))))
14826 && (CONST_INT_P (offset
14827 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
14829 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
14830 ? operands
[i
] : SUBREG_REG (operands
[i
]));
14831 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
14835 base_reg
= REGNO (reg
);
14836 base_reg_rtx
= reg
;
14837 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
14840 else if (base_reg
!= (int) REGNO (reg
))
14841 /* Not addressed from the same base register. */
14844 /* If it isn't an integer register, then we can't do this. */
14845 if (unsorted_regs
[i
] < 0
14846 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
14847 /* The effects are unpredictable if the base register is
14848 both updated and stored. */
14849 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
14850 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
14851 || unsorted_regs
[i
] > 14)
14854 unsorted_offsets
[i
] = INTVAL (offset
);
14855 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
14859 /* Not a suitable memory address. */
14863 /* All the useful information has now been extracted from the
14864 operands into unsorted_regs and unsorted_offsets; additionally,
14865 order[0] has been set to the lowest offset in the list. Sort
14866 the offsets into order, verifying that they are adjacent, and
14867 check that the register numbers are ascending. */
14868 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
14869 check_regs
? unsorted_regs
: NULL
))
14873 memcpy (saved_order
, order
, sizeof order
);
14879 for (i
= 0; i
< nops
; i
++)
14881 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
14883 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
14886 *load_offset
= unsorted_offsets
[order
[0]];
14890 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
14893 if (unsorted_offsets
[order
[0]] == 0)
14894 stm_case
= 1; /* stmia */
14895 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14896 stm_case
= 2; /* stmib */
14897 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14898 stm_case
= 3; /* stmda */
14899 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14900 stm_case
= 4; /* stmdb */
14904 if (!multiple_operation_profitable_p (false, nops
, 0))
14910 /* Routines for use in generating RTL. */
14912 /* Generate a load-multiple instruction. COUNT is the number of loads in
14913 the instruction; REGS and MEMS are arrays containing the operands.
14914 BASEREG is the base register to be used in addressing the memory operands.
14915 WBACK_OFFSET is nonzero if the instruction should update the base
14919 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14920 HOST_WIDE_INT wback_offset
)
14925 if (!multiple_operation_profitable_p (false, count
, 0))
14931 for (i
= 0; i
< count
; i
++)
14932 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
14934 if (wback_offset
!= 0)
14935 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14937 seq
= get_insns ();
14943 result
= gen_rtx_PARALLEL (VOIDmode
,
14944 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14945 if (wback_offset
!= 0)
14947 XVECEXP (result
, 0, 0)
14948 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14953 for (j
= 0; i
< count
; i
++, j
++)
14954 XVECEXP (result
, 0, i
)
14955 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
14960 /* Generate a store-multiple instruction. COUNT is the number of stores in
14961 the instruction; REGS and MEMS are arrays containing the operands.
14962 BASEREG is the base register to be used in addressing the memory operands.
14963 WBACK_OFFSET is nonzero if the instruction should update the base
14967 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14968 HOST_WIDE_INT wback_offset
)
14973 if (GET_CODE (basereg
) == PLUS
)
14974 basereg
= XEXP (basereg
, 0);
14976 if (!multiple_operation_profitable_p (false, count
, 0))
14982 for (i
= 0; i
< count
; i
++)
14983 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
14985 if (wback_offset
!= 0)
14986 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14988 seq
= get_insns ();
14994 result
= gen_rtx_PARALLEL (VOIDmode
,
14995 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14996 if (wback_offset
!= 0)
14998 XVECEXP (result
, 0, 0)
14999 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
15004 for (j
= 0; i
< count
; i
++, j
++)
15005 XVECEXP (result
, 0, i
)
15006 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
15011 /* Generate either a load-multiple or a store-multiple instruction. This
15012 function can be used in situations where we can start with a single MEM
15013 rtx and adjust its address upwards.
15014 COUNT is the number of operations in the instruction, not counting a
15015 possible update of the base register. REGS is an array containing the
15017 BASEREG is the base register to be used in addressing the memory operands,
15018 which are constructed from BASEMEM.
15019 WRITE_BACK specifies whether the generated instruction should include an
15020 update of the base register.
15021 OFFSETP is used to pass an offset to and from this function; this offset
15022 is not used when constructing the address (instead BASEMEM should have an
15023 appropriate offset in its address), it is used only for setting
15024 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
15027 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
15028 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
15030 rtx mems
[MAX_LDM_STM_OPS
];
15031 HOST_WIDE_INT offset
= *offsetp
;
15034 gcc_assert (count
<= MAX_LDM_STM_OPS
);
15036 if (GET_CODE (basereg
) == PLUS
)
15037 basereg
= XEXP (basereg
, 0);
15039 for (i
= 0; i
< count
; i
++)
15041 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
15042 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
15050 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
15051 write_back
? 4 * count
: 0);
15053 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
15054 write_back
? 4 * count
: 0);
15058 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
15059 rtx basemem
, HOST_WIDE_INT
*offsetp
)
15061 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
15066 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
15067 rtx basemem
, HOST_WIDE_INT
*offsetp
)
15069 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
15073 /* Called from a peephole2 expander to turn a sequence of loads into an
15074 LDM instruction. OPERANDS are the operands found by the peephole matcher;
15075 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
15076 is true if we can reorder the registers because they are used commutatively
15078 Returns true iff we could generate a new instruction. */
15081 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
15083 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
15084 rtx mems
[MAX_LDM_STM_OPS
];
15085 int i
, j
, base_reg
;
15087 HOST_WIDE_INT offset
;
15088 int write_back
= FALSE
;
15092 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
15093 &base_reg
, &offset
, !sort_regs
);
15099 for (i
= 0; i
< nops
- 1; i
++)
15100 for (j
= i
+ 1; j
< nops
; j
++)
15101 if (regs
[i
] > regs
[j
])
15107 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15111 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
15113 /* Thumb-1 ldm uses writeback except if the base is loaded. */
15115 for (i
= 0; i
< nops
; i
++)
15116 if (base_reg
== regs
[i
])
15117 write_back
= false;
15119 /* Ensure the base is dead if it is updated. */
15120 if (write_back
&& !peep2_reg_dead_p (nops
, base_reg_rtx
))
15126 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
15127 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
15129 base_reg_rtx
= newbase
;
15132 for (i
= 0; i
< nops
; i
++)
15134 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15135 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15138 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
15139 write_back
? offset
+ i
* 4 : 0));
15143 /* Called from a peephole2 expander to turn a sequence of stores into an
15144 STM instruction. OPERANDS are the operands found by the peephole matcher;
15145 NOPS indicates how many separate stores we are trying to combine.
15146 Returns true iff we could generate a new instruction. */
15149 gen_stm_seq (rtx
*operands
, int nops
)
15152 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
15153 rtx mems
[MAX_LDM_STM_OPS
];
15156 HOST_WIDE_INT offset
;
15157 int write_back
= FALSE
;
15160 bool base_reg_dies
;
15162 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
15163 mem_order
, &base_reg
, &offset
, true);
15168 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15170 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
15173 gcc_assert (base_reg_dies
);
15179 gcc_assert (base_reg_dies
);
15180 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
15184 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
15186 for (i
= 0; i
< nops
; i
++)
15188 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15189 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15192 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
15193 write_back
? offset
+ i
* 4 : 0));
15197 /* Called from a peephole2 expander to turn a sequence of stores that are
15198 preceded by constant loads into an STM instruction. OPERANDS are the
15199 operands found by the peephole matcher; NOPS indicates how many
15200 separate stores we are trying to combine; there are 2 * NOPS
15201 instructions in the peephole.
15202 Returns true iff we could generate a new instruction. */
15205 gen_const_stm_seq (rtx
*operands
, int nops
)
15207 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
15208 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
15209 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
15210 rtx mems
[MAX_LDM_STM_OPS
];
15213 HOST_WIDE_INT offset
;
15214 int write_back
= FALSE
;
15217 bool base_reg_dies
;
15219 HARD_REG_SET allocated
;
15221 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
15222 mem_order
, &base_reg
, &offset
, false);
15227 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
15229 /* If the same register is used more than once, try to find a free
15231 CLEAR_HARD_REG_SET (allocated
);
15232 for (i
= 0; i
< nops
; i
++)
15234 for (j
= i
+ 1; j
< nops
; j
++)
15235 if (regs
[i
] == regs
[j
])
15237 rtx t
= peep2_find_free_register (0, nops
* 2,
15238 TARGET_THUMB1
? "l" : "r",
15239 SImode
, &allocated
);
15243 regs
[i
] = REGNO (t
);
15247 /* Compute an ordering that maps the register numbers to an ascending
15250 for (i
= 0; i
< nops
; i
++)
15251 if (regs
[i
] < regs
[reg_order
[0]])
15254 for (i
= 1; i
< nops
; i
++)
15256 int this_order
= reg_order
[i
- 1];
15257 for (j
= 0; j
< nops
; j
++)
15258 if (regs
[j
] > regs
[reg_order
[i
- 1]]
15259 && (this_order
== reg_order
[i
- 1]
15260 || regs
[j
] < regs
[this_order
]))
15262 reg_order
[i
] = this_order
;
15265 /* Ensure that registers that must be live after the instruction end
15266 up with the correct value. */
15267 for (i
= 0; i
< nops
; i
++)
15269 int this_order
= reg_order
[i
];
15270 if ((this_order
!= mem_order
[i
]
15271 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
15272 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
15276 /* Load the constants. */
15277 for (i
= 0; i
< nops
; i
++)
15279 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
15280 sorted_regs
[i
] = regs
[reg_order
[i
]];
15281 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
15284 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15286 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
15289 gcc_assert (base_reg_dies
);
15295 gcc_assert (base_reg_dies
);
15296 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
15300 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
15302 for (i
= 0; i
< nops
; i
++)
15304 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15305 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15308 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
15309 write_back
? offset
+ i
* 4 : 0));
15313 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15314 unaligned copies on processors which support unaligned semantics for those
15315 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
15316 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15317 An interleave factor of 1 (the minimum) will perform no interleaving.
15318 Load/store multiple are used for aligned addresses where possible. */
15321 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
15322 HOST_WIDE_INT length
,
15323 unsigned int interleave_factor
)
15325 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
15326 int *regnos
= XALLOCAVEC (int, interleave_factor
);
15327 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
15328 HOST_WIDE_INT i
, j
;
15329 HOST_WIDE_INT remaining
= length
, words
;
15330 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
15332 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
15333 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
15334 HOST_WIDE_INT srcoffset
, dstoffset
;
15335 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
15338 gcc_assert (interleave_factor
>= 1 && interleave_factor
<= 4);
15340 /* Use hard registers if we have aligned source or destination so we can use
15341 load/store multiple with contiguous registers. */
15342 if (dst_aligned
|| src_aligned
)
15343 for (i
= 0; i
< interleave_factor
; i
++)
15344 regs
[i
] = gen_rtx_REG (SImode
, i
);
15346 for (i
= 0; i
< interleave_factor
; i
++)
15347 regs
[i
] = gen_reg_rtx (SImode
);
15349 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
15350 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
15352 srcoffset
= dstoffset
= 0;
15354 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15355 For copying the last bytes we want to subtract this offset again. */
15356 src_autoinc
= dst_autoinc
= 0;
15358 for (i
= 0; i
< interleave_factor
; i
++)
15361 /* Copy BLOCK_SIZE_BYTES chunks. */
15363 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
15366 if (src_aligned
&& interleave_factor
> 1)
15368 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
15369 TRUE
, srcbase
, &srcoffset
));
15370 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
15374 for (j
= 0; j
< interleave_factor
; j
++)
15376 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
15378 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
15379 srcoffset
+ j
* UNITS_PER_WORD
);
15380 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
15382 srcoffset
+= block_size_bytes
;
15386 if (dst_aligned
&& interleave_factor
> 1)
15388 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
15389 TRUE
, dstbase
, &dstoffset
));
15390 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
15394 for (j
= 0; j
< interleave_factor
; j
++)
15396 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
15398 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
15399 dstoffset
+ j
* UNITS_PER_WORD
);
15400 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
15402 dstoffset
+= block_size_bytes
;
15405 remaining
-= block_size_bytes
;
15408 /* Copy any whole words left (note these aren't interleaved with any
15409 subsequent halfword/byte load/stores in the interests of simplicity). */
15411 words
= remaining
/ UNITS_PER_WORD
;
15413 gcc_assert (words
< interleave_factor
);
15415 if (src_aligned
&& words
> 1)
15417 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
15419 src_autoinc
+= UNITS_PER_WORD
* words
;
15423 for (j
= 0; j
< words
; j
++)
15425 addr
= plus_constant (Pmode
, src
,
15426 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
15427 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
15428 srcoffset
+ j
* UNITS_PER_WORD
);
15430 emit_move_insn (regs
[j
], mem
);
15432 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
15434 srcoffset
+= words
* UNITS_PER_WORD
;
15437 if (dst_aligned
&& words
> 1)
15439 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
15441 dst_autoinc
+= words
* UNITS_PER_WORD
;
15445 for (j
= 0; j
< words
; j
++)
15447 addr
= plus_constant (Pmode
, dst
,
15448 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
15449 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
15450 dstoffset
+ j
* UNITS_PER_WORD
);
15452 emit_move_insn (mem
, regs
[j
]);
15454 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
15456 dstoffset
+= words
* UNITS_PER_WORD
;
15459 remaining
-= words
* UNITS_PER_WORD
;
15461 gcc_assert (remaining
< 4);
15463 /* Copy a halfword if necessary. */
15465 if (remaining
>= 2)
15467 halfword_tmp
= gen_reg_rtx (SImode
);
15469 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
15470 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
15471 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
15473 /* Either write out immediately, or delay until we've loaded the last
15474 byte, depending on interleave factor. */
15475 if (interleave_factor
== 1)
15477 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15478 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
15479 emit_insn (gen_unaligned_storehi (mem
,
15480 gen_lowpart (HImode
, halfword_tmp
)));
15481 halfword_tmp
= NULL
;
15489 gcc_assert (remaining
< 2);
15491 /* Copy last byte. */
15493 if ((remaining
& 1) != 0)
15495 byte_tmp
= gen_reg_rtx (SImode
);
15497 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
15498 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
15499 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
15501 if (interleave_factor
== 1)
15503 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15504 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
15505 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
15514 /* Store last halfword if we haven't done so already. */
15518 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15519 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
15520 emit_insn (gen_unaligned_storehi (mem
,
15521 gen_lowpart (HImode
, halfword_tmp
)));
15525 /* Likewise for last byte. */
15529 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15530 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
15531 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
15535 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
15538 /* From mips_adjust_block_mem:
15540 Helper function for doing a loop-based block operation on memory
15541 reference MEM. Each iteration of the loop will operate on LENGTH
15544 Create a new base register for use within the loop and point it to
15545 the start of MEM. Create a new memory reference that uses this
15546 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15549 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
15552 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
15554 /* Although the new mem does not refer to a known location,
15555 it does keep up to LENGTH bytes of alignment. */
15556 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
15557 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
15560 /* From mips_block_move_loop:
15562 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15563 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15564 the memory regions do not overlap. */
15567 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
15568 unsigned int interleave_factor
,
15569 HOST_WIDE_INT bytes_per_iter
)
15571 rtx src_reg
, dest_reg
, final_src
, test
;
15572 HOST_WIDE_INT leftover
;
15574 leftover
= length
% bytes_per_iter
;
15575 length
-= leftover
;
15577 /* Create registers and memory references for use within the loop. */
15578 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
15579 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
15581 /* Calculate the value that SRC_REG should have after the last iteration of
15583 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
15584 0, 0, OPTAB_WIDEN
);
15586 /* Emit the start of the loop. */
15587 rtx_code_label
*label
= gen_label_rtx ();
15588 emit_label (label
);
15590 /* Emit the loop body. */
15591 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
15592 interleave_factor
);
15594 /* Move on to the next block. */
15595 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
15596 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
15598 /* Emit the loop condition. */
15599 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
15600 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
15602 /* Mop up any left-over bytes. */
15604 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
15607 /* Emit a block move when either the source or destination is unaligned (not
15608 aligned to a four-byte boundary). This may need further tuning depending on
15609 core type, optimize_size setting, etc. */
15612 arm_cpymemqi_unaligned (rtx
*operands
)
15614 HOST_WIDE_INT length
= INTVAL (operands
[2]);
15618 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
15619 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
15620 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15621 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15622 or dst_aligned though: allow more interleaving in those cases since the
15623 resulting code can be smaller. */
15624 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
15625 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
15628 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
15629 interleave_factor
, bytes_per_iter
);
15631 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
15632 interleave_factor
);
15636 /* Note that the loop created by arm_block_move_unaligned_loop may be
15637 subject to loop unrolling, which makes tuning this condition a little
15640 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
15642 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
15649 arm_gen_cpymemqi (rtx
*operands
)
15651 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
15652 HOST_WIDE_INT srcoffset
, dstoffset
;
15653 rtx src
, dst
, srcbase
, dstbase
;
15654 rtx part_bytes_reg
= NULL
;
15657 if (!CONST_INT_P (operands
[2])
15658 || !CONST_INT_P (operands
[3])
15659 || INTVAL (operands
[2]) > 64)
15662 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
15663 return arm_cpymemqi_unaligned (operands
);
15665 if (INTVAL (operands
[3]) & 3)
15668 dstbase
= operands
[0];
15669 srcbase
= operands
[1];
15671 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
15672 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
15674 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
15675 out_words_to_go
= INTVAL (operands
[2]) / 4;
15676 last_bytes
= INTVAL (operands
[2]) & 3;
15677 dstoffset
= srcoffset
= 0;
15679 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
15680 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
15682 while (in_words_to_go
>= 2)
15684 if (in_words_to_go
> 4)
15685 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
15686 TRUE
, srcbase
, &srcoffset
));
15688 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
15689 src
, FALSE
, srcbase
,
15692 if (out_words_to_go
)
15694 if (out_words_to_go
> 4)
15695 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
15696 TRUE
, dstbase
, &dstoffset
));
15697 else if (out_words_to_go
!= 1)
15698 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
15699 out_words_to_go
, dst
,
15702 dstbase
, &dstoffset
));
15705 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
15706 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
15707 if (last_bytes
!= 0)
15709 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
15715 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
15716 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
15719 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15720 if (out_words_to_go
)
15724 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
15725 sreg
= copy_to_reg (mem
);
15727 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
15728 emit_move_insn (mem
, sreg
);
15731 gcc_assert (!in_words_to_go
); /* Sanity check */
15734 if (in_words_to_go
)
15736 gcc_assert (in_words_to_go
> 0);
15738 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
15739 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
15742 gcc_assert (!last_bytes
|| part_bytes_reg
);
15744 if (BYTES_BIG_ENDIAN
&& last_bytes
)
15746 rtx tmp
= gen_reg_rtx (SImode
);
15748 /* The bytes we want are in the top end of the word. */
15749 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
15750 GEN_INT (8 * (4 - last_bytes
))));
15751 part_bytes_reg
= tmp
;
15755 mem
= adjust_automodify_address (dstbase
, QImode
,
15756 plus_constant (Pmode
, dst
,
15758 dstoffset
+ last_bytes
- 1);
15759 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
15763 tmp
= gen_reg_rtx (SImode
);
15764 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
15765 part_bytes_reg
= tmp
;
15772 if (last_bytes
> 1)
15774 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
15775 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
15779 rtx tmp
= gen_reg_rtx (SImode
);
15780 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
15781 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
15782 part_bytes_reg
= tmp
;
15789 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
15790 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
15797 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15800 next_consecutive_mem (rtx mem
)
15802 machine_mode mode
= GET_MODE (mem
);
15803 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
15804 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
15806 return adjust_automodify_address (mem
, mode
, addr
, offset
);
15809 /* Copy using LDRD/STRD instructions whenever possible.
15810 Returns true upon success. */
15812 gen_cpymem_ldrd_strd (rtx
*operands
)
15814 unsigned HOST_WIDE_INT len
;
15815 HOST_WIDE_INT align
;
15816 rtx src
, dst
, base
;
15818 bool src_aligned
, dst_aligned
;
15819 bool src_volatile
, dst_volatile
;
15821 gcc_assert (CONST_INT_P (operands
[2]));
15822 gcc_assert (CONST_INT_P (operands
[3]));
15824 len
= UINTVAL (operands
[2]);
15828 /* Maximum alignment we can assume for both src and dst buffers. */
15829 align
= INTVAL (operands
[3]);
15831 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
15834 /* Place src and dst addresses in registers
15835 and update the corresponding mem rtx. */
15837 dst_volatile
= MEM_VOLATILE_P (dst
);
15838 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
15839 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
15840 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
15843 src_volatile
= MEM_VOLATILE_P (src
);
15844 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
15845 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
15846 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
15848 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
15851 if (src_volatile
|| dst_volatile
)
15854 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15855 if (!(dst_aligned
|| src_aligned
))
15856 return arm_gen_cpymemqi (operands
);
15858 /* If the either src or dst is unaligned we'll be accessing it as pairs
15859 of unaligned SImode accesses. Otherwise we can generate DImode
15860 ldrd/strd instructions. */
15861 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
15862 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
15867 reg0
= gen_reg_rtx (DImode
);
15868 rtx first_reg
= NULL_RTX
;
15869 rtx second_reg
= NULL_RTX
;
15871 if (!src_aligned
|| !dst_aligned
)
15873 if (BYTES_BIG_ENDIAN
)
15875 second_reg
= gen_lowpart (SImode
, reg0
);
15876 first_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
15880 first_reg
= gen_lowpart (SImode
, reg0
);
15881 second_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
15884 if (MEM_ALIGN (src
) >= 2 * BITS_PER_WORD
)
15885 emit_move_insn (reg0
, src
);
15886 else if (src_aligned
)
15887 emit_insn (gen_unaligned_loaddi (reg0
, src
));
15890 emit_insn (gen_unaligned_loadsi (first_reg
, src
));
15891 src
= next_consecutive_mem (src
);
15892 emit_insn (gen_unaligned_loadsi (second_reg
, src
));
15895 if (MEM_ALIGN (dst
) >= 2 * BITS_PER_WORD
)
15896 emit_move_insn (dst
, reg0
);
15897 else if (dst_aligned
)
15898 emit_insn (gen_unaligned_storedi (dst
, reg0
));
15901 emit_insn (gen_unaligned_storesi (dst
, first_reg
));
15902 dst
= next_consecutive_mem (dst
);
15903 emit_insn (gen_unaligned_storesi (dst
, second_reg
));
15906 src
= next_consecutive_mem (src
);
15907 dst
= next_consecutive_mem (dst
);
15910 gcc_assert (len
< 8);
15913 /* More than a word but less than a double-word to copy. Copy a word. */
15914 reg0
= gen_reg_rtx (SImode
);
15915 src
= adjust_address (src
, SImode
, 0);
15916 dst
= adjust_address (dst
, SImode
, 0);
15918 emit_move_insn (reg0
, src
);
15920 emit_insn (gen_unaligned_loadsi (reg0
, src
));
15923 emit_move_insn (dst
, reg0
);
15925 emit_insn (gen_unaligned_storesi (dst
, reg0
));
15927 src
= next_consecutive_mem (src
);
15928 dst
= next_consecutive_mem (dst
);
15935 /* Copy the remaining bytes. */
15938 dst
= adjust_address (dst
, HImode
, 0);
15939 src
= adjust_address (src
, HImode
, 0);
15940 reg0
= gen_reg_rtx (SImode
);
15942 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
15944 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
15947 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
15949 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
15951 src
= next_consecutive_mem (src
);
15952 dst
= next_consecutive_mem (dst
);
15957 dst
= adjust_address (dst
, QImode
, 0);
15958 src
= adjust_address (src
, QImode
, 0);
15959 reg0
= gen_reg_rtx (QImode
);
15960 emit_move_insn (reg0
, src
);
15961 emit_move_insn (dst
, reg0
);
15965 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15966 into its component 32-bit subregs. OP2 may be an immediate
15967 constant and we want to simplify it in that case. */
15969 arm_decompose_di_binop (rtx op1
, rtx op2
, rtx
*lo_op1
, rtx
*hi_op1
,
15970 rtx
*lo_op2
, rtx
*hi_op2
)
15972 *lo_op1
= gen_lowpart (SImode
, op1
);
15973 *hi_op1
= gen_highpart (SImode
, op1
);
15974 *lo_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
15975 subreg_lowpart_offset (SImode
, DImode
));
15976 *hi_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
15977 subreg_highpart_offset (SImode
, DImode
));
15980 /* Select a dominance comparison mode if possible for a test of the general
15981 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15982 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15983 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15984 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15985 In all cases OP will be either EQ or NE, but we don't need to know which
15986 here. If we are unable to support a dominance comparison we return
15987 CC mode. This will then fail to match for the RTL expressions that
15988 generate this call. */
15990 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
15992 enum rtx_code cond1
, cond2
;
15995 /* Currently we will probably get the wrong result if the individual
15996 comparisons are not simple. This also ensures that it is safe to
15997 reverse a comparison if necessary. */
15998 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
16000 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
16004 /* The if_then_else variant of this tests the second condition if the
16005 first passes, but is true if the first fails. Reverse the first
16006 condition to get a true "inclusive-or" expression. */
16007 if (cond_or
== DOM_CC_NX_OR_Y
)
16008 cond1
= reverse_condition (cond1
);
16010 /* If the comparisons are not equal, and one doesn't dominate the other,
16011 then we can't do this. */
16013 && !comparison_dominates_p (cond1
, cond2
)
16014 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
16018 std::swap (cond1
, cond2
);
16023 if (cond_or
== DOM_CC_X_AND_Y
)
16028 case EQ
: return CC_DEQmode
;
16029 case LE
: return CC_DLEmode
;
16030 case LEU
: return CC_DLEUmode
;
16031 case GE
: return CC_DGEmode
;
16032 case GEU
: return CC_DGEUmode
;
16033 default: gcc_unreachable ();
16037 if (cond_or
== DOM_CC_X_AND_Y
)
16049 gcc_unreachable ();
16053 if (cond_or
== DOM_CC_X_AND_Y
)
16065 gcc_unreachable ();
16069 if (cond_or
== DOM_CC_X_AND_Y
)
16070 return CC_DLTUmode
;
16075 return CC_DLTUmode
;
16077 return CC_DLEUmode
;
16081 gcc_unreachable ();
16085 if (cond_or
== DOM_CC_X_AND_Y
)
16086 return CC_DGTUmode
;
16091 return CC_DGTUmode
;
16093 return CC_DGEUmode
;
16097 gcc_unreachable ();
16100 /* The remaining cases only occur when both comparisons are the
16103 gcc_assert (cond1
== cond2
);
16107 gcc_assert (cond1
== cond2
);
16111 gcc_assert (cond1
== cond2
);
16115 gcc_assert (cond1
== cond2
);
16116 return CC_DLEUmode
;
16119 gcc_assert (cond1
== cond2
);
16120 return CC_DGEUmode
;
16123 gcc_unreachable ();
16128 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
16130 /* All floating point compares return CCFP if it is an equality
16131 comparison, and CCFPE otherwise. */
16132 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
16155 gcc_unreachable ();
16159 /* A compare with a shifted operand. Because of canonicalization, the
16160 comparison will have to be swapped when we emit the assembler. */
16161 if (GET_MODE (y
) == SImode
16162 && (REG_P (y
) || (SUBREG_P (y
)))
16163 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
16164 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
16165 || GET_CODE (x
) == ROTATERT
))
16168 /* A widened compare of the sum of a value plus a carry against a
16169 constant. This is a representation of RSC. We want to swap the
16170 result of the comparison at output. Not valid if the Z bit is
16172 if (GET_MODE (x
) == DImode
16173 && GET_CODE (x
) == PLUS
16174 && arm_borrow_operation (XEXP (x
, 1), DImode
)
16176 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
16177 && (op
== LE
|| op
== GT
))
16178 || (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
16179 && (op
== LEU
|| op
== GTU
))))
16182 /* If X is a constant we want to use CC_RSBmode. This is
16183 non-canonical, but arm_gen_compare_reg uses this to generate the
16184 correct canonical form. */
16185 if (GET_MODE (y
) == SImode
16186 && (REG_P (y
) || SUBREG_P (y
))
16187 && CONST_INT_P (x
))
16190 /* This operation is performed swapped, but since we only rely on the Z
16191 flag we don't need an additional mode. */
16192 if (GET_MODE (y
) == SImode
16193 && (REG_P (y
) || (SUBREG_P (y
)))
16194 && GET_CODE (x
) == NEG
16195 && (op
== EQ
|| op
== NE
))
16198 /* This is a special case that is used by combine to allow a
16199 comparison of a shifted byte load to be split into a zero-extend
16200 followed by a comparison of the shifted integer (only valid for
16201 equalities and unsigned inequalities). */
16202 if (GET_MODE (x
) == SImode
16203 && GET_CODE (x
) == ASHIFT
16204 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
16205 && GET_CODE (XEXP (x
, 0)) == SUBREG
16206 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
16207 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
16208 && (op
== EQ
|| op
== NE
16209 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
16210 && CONST_INT_P (y
))
16213 /* A construct for a conditional compare, if the false arm contains
16214 0, then both conditions must be true, otherwise either condition
16215 must be true. Not all conditions are possible, so CCmode is
16216 returned if it can't be done. */
16217 if (GET_CODE (x
) == IF_THEN_ELSE
16218 && (XEXP (x
, 2) == const0_rtx
16219 || XEXP (x
, 2) == const1_rtx
)
16220 && COMPARISON_P (XEXP (x
, 0))
16221 && COMPARISON_P (XEXP (x
, 1)))
16222 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16223 INTVAL (XEXP (x
, 2)));
16225 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
16226 if (GET_CODE (x
) == AND
16227 && (op
== EQ
|| op
== NE
)
16228 && COMPARISON_P (XEXP (x
, 0))
16229 && COMPARISON_P (XEXP (x
, 1)))
16230 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16233 if (GET_CODE (x
) == IOR
16234 && (op
== EQ
|| op
== NE
)
16235 && COMPARISON_P (XEXP (x
, 0))
16236 && COMPARISON_P (XEXP (x
, 1)))
16237 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16240 /* An operation (on Thumb) where we want to test for a single bit.
16241 This is done by shifting that bit up into the top bit of a
16242 scratch register; we can then branch on the sign bit. */
16244 && GET_MODE (x
) == SImode
16245 && (op
== EQ
|| op
== NE
)
16246 && GET_CODE (x
) == ZERO_EXTRACT
16247 && XEXP (x
, 1) == const1_rtx
)
16250 /* An operation that sets the condition codes as a side-effect, the
16251 V flag is not set correctly, so we can only use comparisons where
16252 this doesn't matter. (For LT and GE we can use "mi" and "pl"
16254 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
16255 if (GET_MODE (x
) == SImode
16257 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
16258 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
16259 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
16260 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
16261 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
16262 || GET_CODE (x
) == LSHIFTRT
16263 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
16264 || GET_CODE (x
) == ROTATERT
16265 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
16268 /* A comparison of ~reg with a const is really a special
16269 canoncialization of compare (~const, reg), which is a reverse
16270 subtract operation. We may not get here if CONST is 0, but that
16271 doesn't matter because ~0 isn't a valid immediate for RSB. */
16272 if (GET_MODE (x
) == SImode
16273 && GET_CODE (x
) == NOT
16274 && CONST_INT_P (y
))
16277 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
16280 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
16281 && GET_CODE (x
) == PLUS
16282 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
16285 if (GET_MODE (x
) == DImode
16286 && GET_CODE (x
) == PLUS
16287 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
16289 && UINTVAL (y
) == 0x800000000
16290 && (op
== GEU
|| op
== LTU
))
16293 if (GET_MODE (x
) == DImode
16294 && (op
== GE
|| op
== LT
)
16295 && GET_CODE (x
) == SIGN_EXTEND
16296 && ((GET_CODE (y
) == PLUS
16297 && arm_borrow_operation (XEXP (y
, 0), DImode
))
16298 || arm_borrow_operation (y
, DImode
)))
16301 if (GET_MODE (x
) == DImode
16302 && (op
== GEU
|| op
== LTU
)
16303 && GET_CODE (x
) == ZERO_EXTEND
16304 && ((GET_CODE (y
) == PLUS
16305 && arm_borrow_operation (XEXP (y
, 0), DImode
))
16306 || arm_borrow_operation (y
, DImode
)))
16309 if (GET_MODE (x
) == DImode
16310 && (op
== EQ
|| op
== NE
)
16311 && (GET_CODE (x
) == PLUS
16312 || GET_CODE (x
) == MINUS
)
16313 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
16314 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
16315 && GET_CODE (y
) == SIGN_EXTEND
16316 && GET_CODE (XEXP (y
, 0)) == GET_CODE (x
))
16319 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
16320 return GET_MODE (x
);
16325 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
16326 the sequence of instructions needed to generate a suitable condition
16327 code register. Return the CC register result. */
16329 arm_gen_dicompare_reg (rtx_code code
, rtx x
, rtx y
, rtx scratch
)
16334 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
16335 gcc_assert (TARGET_32BIT
);
16336 gcc_assert (!CONST_INT_P (x
));
16338 rtx x_lo
= simplify_gen_subreg (SImode
, x
, DImode
,
16339 subreg_lowpart_offset (SImode
, DImode
));
16340 rtx x_hi
= simplify_gen_subreg (SImode
, x
, DImode
,
16341 subreg_highpart_offset (SImode
, DImode
));
16342 rtx y_lo
= simplify_gen_subreg (SImode
, y
, DImode
,
16343 subreg_lowpart_offset (SImode
, DImode
));
16344 rtx y_hi
= simplify_gen_subreg (SImode
, y
, DImode
,
16345 subreg_highpart_offset (SImode
, DImode
));
16351 if (y_lo
== const0_rtx
|| y_hi
== const0_rtx
)
16353 if (y_lo
!= const0_rtx
)
16355 rtx scratch2
= scratch
? scratch
: gen_reg_rtx (SImode
);
16357 gcc_assert (y_hi
== const0_rtx
);
16358 y_lo
= gen_int_mode (-INTVAL (y_lo
), SImode
);
16359 if (!arm_add_operand (y_lo
, SImode
))
16360 y_lo
= force_reg (SImode
, y_lo
);
16361 emit_insn (gen_addsi3 (scratch2
, x_lo
, y_lo
));
16364 else if (y_hi
!= const0_rtx
)
16366 rtx scratch2
= scratch
? scratch
: gen_reg_rtx (SImode
);
16368 y_hi
= gen_int_mode (-INTVAL (y_hi
), SImode
);
16369 if (!arm_add_operand (y_hi
, SImode
))
16370 y_hi
= force_reg (SImode
, y_hi
);
16371 emit_insn (gen_addsi3 (scratch2
, x_hi
, y_hi
));
16377 gcc_assert (!reload_completed
);
16378 scratch
= gen_rtx_SCRATCH (SImode
);
16381 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
16382 cc_reg
= gen_rtx_REG (CC_NZmode
, CC_REGNUM
);
16385 = gen_rtx_SET (cc_reg
,
16386 gen_rtx_COMPARE (CC_NZmode
,
16387 gen_rtx_IOR (SImode
, x_lo
, x_hi
),
16389 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
,
16394 if (!arm_add_operand (y_lo
, SImode
))
16395 y_lo
= force_reg (SImode
, y_lo
);
16397 if (!arm_add_operand (y_hi
, SImode
))
16398 y_hi
= force_reg (SImode
, y_hi
);
16400 rtx cmp1
= gen_rtx_NE (SImode
, x_lo
, y_lo
);
16401 rtx cmp2
= gen_rtx_NE (SImode
, x_hi
, y_hi
);
16402 rtx conjunction
= gen_rtx_IOR (SImode
, cmp1
, cmp2
);
16403 mode
= SELECT_CC_MODE (code
, conjunction
, const0_rtx
);
16404 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
16406 emit_insn (gen_rtx_SET (cc_reg
,
16407 gen_rtx_COMPARE (mode
, conjunction
,
16415 if (y_lo
== const0_rtx
)
16417 /* If the low word of y is 0, then this is simply a normal
16418 compare of the upper words. */
16419 if (!arm_add_operand (y_hi
, SImode
))
16420 y_hi
= force_reg (SImode
, y_hi
);
16422 return arm_gen_compare_reg (code
, x_hi
, y_hi
, NULL_RTX
);
16425 if (!arm_add_operand (y_lo
, SImode
))
16426 y_lo
= force_reg (SImode
, y_lo
);
16429 = gen_rtx_LTU (DImode
,
16430 arm_gen_compare_reg (LTU
, x_lo
, y_lo
, NULL_RTX
),
16434 scratch
= gen_rtx_SCRATCH (SImode
);
16436 if (!arm_not_operand (y_hi
, SImode
))
16437 y_hi
= force_reg (SImode
, y_hi
);
16440 if (y_hi
== const0_rtx
)
16441 insn
= emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch
, x_hi
,
16443 else if (CONST_INT_P (y_hi
))
16444 insn
= emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch
, x_hi
,
16447 insn
= emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch
, x_hi
, y_hi
,
16449 return SET_DEST (single_set (insn
));
16455 /* During expansion, we only expect to get here if y is a
16456 constant that we want to handle, otherwise we should have
16457 swapped the operands already. */
16458 gcc_assert (arm_const_double_prefer_rsbs_rsc (y
));
16460 if (!const_ok_for_arm (INTVAL (y_lo
)))
16461 y_lo
= force_reg (SImode
, y_lo
);
16463 /* Perform a reverse subtract and compare. */
16465 = gen_rtx_LTU (DImode
,
16466 arm_gen_compare_reg (LTU
, y_lo
, x_lo
, scratch
),
16468 rtx_insn
*insn
= emit_insn (gen_rscsi3_CC_NVout_scratch (scratch
, y_hi
,
16470 return SET_DEST (single_set (insn
));
16476 if (y_lo
== const0_rtx
)
16478 /* If the low word of y is 0, then this is simply a normal
16479 compare of the upper words. */
16480 if (!arm_add_operand (y_hi
, SImode
))
16481 y_hi
= force_reg (SImode
, y_hi
);
16483 return arm_gen_compare_reg (code
, x_hi
, y_hi
, NULL_RTX
);
16486 if (!arm_add_operand (y_lo
, SImode
))
16487 y_lo
= force_reg (SImode
, y_lo
);
16490 = gen_rtx_LTU (DImode
,
16491 arm_gen_compare_reg (LTU
, x_lo
, y_lo
, NULL_RTX
),
16495 scratch
= gen_rtx_SCRATCH (SImode
);
16496 if (!arm_not_operand (y_hi
, SImode
))
16497 y_hi
= force_reg (SImode
, y_hi
);
16500 if (y_hi
== const0_rtx
)
16501 insn
= emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch
, x_hi
,
16503 else if (CONST_INT_P (y_hi
))
16505 /* Constant is viewed as unsigned when zero-extended. */
16506 y_hi
= GEN_INT (UINTVAL (y_hi
) & 0xffffffffULL
);
16507 insn
= emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch
, x_hi
,
16511 insn
= emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch
, x_hi
, y_hi
,
16513 return SET_DEST (single_set (insn
));
16519 /* During expansion, we only expect to get here if y is a
16520 constant that we want to handle, otherwise we should have
16521 swapped the operands already. */
16522 gcc_assert (arm_const_double_prefer_rsbs_rsc (y
));
16524 if (!const_ok_for_arm (INTVAL (y_lo
)))
16525 y_lo
= force_reg (SImode
, y_lo
);
16527 /* Perform a reverse subtract and compare. */
16529 = gen_rtx_LTU (DImode
,
16530 arm_gen_compare_reg (LTU
, y_lo
, x_lo
, scratch
),
16532 y_hi
= GEN_INT (0xffffffff & UINTVAL (y_hi
));
16533 rtx_insn
*insn
= emit_insn (gen_rscsi3_CC_Bout_scratch (scratch
, y_hi
,
16535 return SET_DEST (single_set (insn
));
16539 gcc_unreachable ();
16543 /* X and Y are two things to compare using CODE. Emit the compare insn and
16544 return the rtx for register 0 in the proper mode. */
16546 arm_gen_compare_reg (rtx_code code
, rtx x
, rtx y
, rtx scratch
)
16548 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
16549 return arm_gen_dicompare_reg (code
, x
, y
, scratch
);
16551 machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
16552 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
16553 if (mode
== CC_RSBmode
)
16556 scratch
= gen_rtx_SCRATCH (SImode
);
16557 emit_insn (gen_rsb_imm_compare_scratch (scratch
,
16558 GEN_INT (~UINTVAL (x
)), y
));
16561 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
16566 /* Generate a sequence of insns that will generate the correct return
16567 address mask depending on the physical architecture that the program
16570 arm_gen_return_addr_mask (void)
16572 rtx reg
= gen_reg_rtx (Pmode
);
16574 emit_insn (gen_return_addr_mask (reg
));
16579 arm_reload_in_hi (rtx
*operands
)
16581 rtx ref
= operands
[1];
16583 HOST_WIDE_INT offset
= 0;
16585 if (SUBREG_P (ref
))
16587 offset
= SUBREG_BYTE (ref
);
16588 ref
= SUBREG_REG (ref
);
16593 /* We have a pseudo which has been spilt onto the stack; there
16594 are two cases here: the first where there is a simple
16595 stack-slot replacement and a second where the stack-slot is
16596 out of range, or is used as a subreg. */
16597 if (reg_equiv_mem (REGNO (ref
)))
16599 ref
= reg_equiv_mem (REGNO (ref
));
16600 base
= find_replacement (&XEXP (ref
, 0));
16603 /* The slot is out of range, or was dressed up in a SUBREG. */
16604 base
= reg_equiv_address (REGNO (ref
));
16606 /* PR 62554: If there is no equivalent memory location then just move
16607 the value as an SImode register move. This happens when the target
16608 architecture variant does not have an HImode register move. */
16611 gcc_assert (REG_P (operands
[0]));
16612 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16613 gen_rtx_SUBREG (SImode
, ref
, 0)));
16618 base
= find_replacement (&XEXP (ref
, 0));
16620 /* Handle the case where the address is too complex to be offset by 1. */
16621 if (GET_CODE (base
) == MINUS
16622 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
16624 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16626 emit_set_insn (base_plus
, base
);
16629 else if (GET_CODE (base
) == PLUS
)
16631 /* The addend must be CONST_INT, or we would have dealt with it above. */
16632 HOST_WIDE_INT hi
, lo
;
16634 offset
+= INTVAL (XEXP (base
, 1));
16635 base
= XEXP (base
, 0);
16637 /* Rework the address into a legal sequence of insns. */
16638 /* Valid range for lo is -4095 -> 4095 */
16641 : -((-offset
) & 0xfff));
16643 /* Corner case, if lo is the max offset then we would be out of range
16644 once we have added the additional 1 below, so bump the msb into the
16645 pre-loading insn(s). */
16649 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
16650 ^ (HOST_WIDE_INT
) 0x80000000)
16651 - (HOST_WIDE_INT
) 0x80000000);
16653 gcc_assert (hi
+ lo
== offset
);
16657 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16659 /* Get the base address; addsi3 knows how to handle constants
16660 that require more than one insn. */
16661 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
16667 /* Operands[2] may overlap operands[0] (though it won't overlap
16668 operands[1]), that's why we asked for a DImode reg -- so we can
16669 use the bit that does not overlap. */
16670 if (REGNO (operands
[2]) == REGNO (operands
[0]))
16671 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16673 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
16675 emit_insn (gen_zero_extendqisi2 (scratch
,
16676 gen_rtx_MEM (QImode
,
16677 plus_constant (Pmode
, base
,
16679 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16680 gen_rtx_MEM (QImode
,
16681 plus_constant (Pmode
, base
,
16683 if (!BYTES_BIG_ENDIAN
)
16684 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16685 gen_rtx_IOR (SImode
,
16688 gen_rtx_SUBREG (SImode
, operands
[0], 0),
16692 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16693 gen_rtx_IOR (SImode
,
16694 gen_rtx_ASHIFT (SImode
, scratch
,
16696 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
16699 /* Handle storing a half-word to memory during reload by synthesizing as two
16700 byte stores. Take care not to clobber the input values until after we
16701 have moved them somewhere safe. This code assumes that if the DImode
16702 scratch in operands[2] overlaps either the input value or output address
16703 in some way, then that value must die in this insn (we absolutely need
16704 two scratch registers for some corner cases). */
16706 arm_reload_out_hi (rtx
*operands
)
16708 rtx ref
= operands
[0];
16709 rtx outval
= operands
[1];
16711 HOST_WIDE_INT offset
= 0;
16713 if (SUBREG_P (ref
))
16715 offset
= SUBREG_BYTE (ref
);
16716 ref
= SUBREG_REG (ref
);
16721 /* We have a pseudo which has been spilt onto the stack; there
16722 are two cases here: the first where there is a simple
16723 stack-slot replacement and a second where the stack-slot is
16724 out of range, or is used as a subreg. */
16725 if (reg_equiv_mem (REGNO (ref
)))
16727 ref
= reg_equiv_mem (REGNO (ref
));
16728 base
= find_replacement (&XEXP (ref
, 0));
16731 /* The slot is out of range, or was dressed up in a SUBREG. */
16732 base
= reg_equiv_address (REGNO (ref
));
16734 /* PR 62254: If there is no equivalent memory location then just move
16735 the value as an SImode register move. This happens when the target
16736 architecture variant does not have an HImode register move. */
16739 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
16741 if (REG_P (outval
))
16743 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
16744 gen_rtx_SUBREG (SImode
, outval
, 0)));
16746 else /* SUBREG_P (outval) */
16748 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
16749 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
16750 SUBREG_REG (outval
)));
16752 /* FIXME: Handle other cases ? */
16753 gcc_unreachable ();
16759 base
= find_replacement (&XEXP (ref
, 0));
16761 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
16763 /* Handle the case where the address is too complex to be offset by 1. */
16764 if (GET_CODE (base
) == MINUS
16765 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
16767 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16769 /* Be careful not to destroy OUTVAL. */
16770 if (reg_overlap_mentioned_p (base_plus
, outval
))
16772 /* Updating base_plus might destroy outval, see if we can
16773 swap the scratch and base_plus. */
16774 if (!reg_overlap_mentioned_p (scratch
, outval
))
16775 std::swap (scratch
, base_plus
);
16778 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
16780 /* Be conservative and copy OUTVAL into the scratch now,
16781 this should only be necessary if outval is a subreg
16782 of something larger than a word. */
16783 /* XXX Might this clobber base? I can't see how it can,
16784 since scratch is known to overlap with OUTVAL, and
16785 must be wider than a word. */
16786 emit_insn (gen_movhi (scratch_hi
, outval
));
16787 outval
= scratch_hi
;
16791 emit_set_insn (base_plus
, base
);
16794 else if (GET_CODE (base
) == PLUS
)
16796 /* The addend must be CONST_INT, or we would have dealt with it above. */
16797 HOST_WIDE_INT hi
, lo
;
16799 offset
+= INTVAL (XEXP (base
, 1));
16800 base
= XEXP (base
, 0);
16802 /* Rework the address into a legal sequence of insns. */
16803 /* Valid range for lo is -4095 -> 4095 */
16806 : -((-offset
) & 0xfff));
16808 /* Corner case, if lo is the max offset then we would be out of range
16809 once we have added the additional 1 below, so bump the msb into the
16810 pre-loading insn(s). */
16814 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
16815 ^ (HOST_WIDE_INT
) 0x80000000)
16816 - (HOST_WIDE_INT
) 0x80000000);
16818 gcc_assert (hi
+ lo
== offset
);
16822 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16824 /* Be careful not to destroy OUTVAL. */
16825 if (reg_overlap_mentioned_p (base_plus
, outval
))
16827 /* Updating base_plus might destroy outval, see if we
16828 can swap the scratch and base_plus. */
16829 if (!reg_overlap_mentioned_p (scratch
, outval
))
16830 std::swap (scratch
, base_plus
);
16833 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
16835 /* Be conservative and copy outval into scratch now,
16836 this should only be necessary if outval is a
16837 subreg of something larger than a word. */
16838 /* XXX Might this clobber base? I can't see how it
16839 can, since scratch is known to overlap with
16841 emit_insn (gen_movhi (scratch_hi
, outval
));
16842 outval
= scratch_hi
;
16846 /* Get the base address; addsi3 knows how to handle constants
16847 that require more than one insn. */
16848 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
16854 if (BYTES_BIG_ENDIAN
)
16856 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
16857 plus_constant (Pmode
, base
,
16859 gen_lowpart (QImode
, outval
)));
16860 emit_insn (gen_lshrsi3 (scratch
,
16861 gen_rtx_SUBREG (SImode
, outval
, 0),
16863 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
16865 gen_lowpart (QImode
, scratch
)));
16869 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
16871 gen_lowpart (QImode
, outval
)));
16872 emit_insn (gen_lshrsi3 (scratch
,
16873 gen_rtx_SUBREG (SImode
, outval
, 0),
16875 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
16876 plus_constant (Pmode
, base
,
16878 gen_lowpart (QImode
, scratch
)));
16882 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16883 (padded to the size of a word) should be passed in a register. */
16886 arm_must_pass_in_stack (const function_arg_info
&arg
)
16888 if (TARGET_AAPCS_BASED
)
16889 return must_pass_in_stack_var_size (arg
);
16891 return must_pass_in_stack_var_size_or_pad (arg
);
16895 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16896 byte of a stack argument has useful data. For legacy APCS ABIs we use
16897 the default. For AAPCS based ABIs small aggregate types are placed
16898 in the lowest memory address. */
16900 static pad_direction
16901 arm_function_arg_padding (machine_mode mode
, const_tree type
)
16903 if (!TARGET_AAPCS_BASED
)
16904 return default_function_arg_padding (mode
, type
);
16906 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
16907 return PAD_DOWNWARD
;
16913 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16914 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16915 register has useful data, and return the opposite if the most
16916 significant byte does. */
16919 arm_pad_reg_upward (machine_mode mode
,
16920 tree type
, int first ATTRIBUTE_UNUSED
)
16922 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
16924 /* For AAPCS, small aggregates, small fixed-point types,
16925 and small complex types are always padded upwards. */
16928 if ((AGGREGATE_TYPE_P (type
)
16929 || TREE_CODE (type
) == COMPLEX_TYPE
16930 || FIXED_POINT_TYPE_P (type
))
16931 && int_size_in_bytes (type
) <= 4)
16936 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
16937 && GET_MODE_SIZE (mode
) <= 4)
16942 /* Otherwise, use default padding. */
16943 return !BYTES_BIG_ENDIAN
;
16946 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16947 assuming that the address in the base register is word aligned. */
16949 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
16951 HOST_WIDE_INT max_offset
;
16953 /* Offset must be a multiple of 4 in Thumb mode. */
16954 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
16959 else if (TARGET_ARM
)
16964 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
16967 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16968 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16969 Assumes that the address in the base register RN is word aligned. Pattern
16970 guarantees that both memory accesses use the same base register,
16971 the offsets are constants within the range, and the gap between the offsets is 4.
16972 If preload complete then check that registers are legal. WBACK indicates whether
16973 address is updated. LOAD indicates whether memory access is load or store. */
16975 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
16976 bool wback
, bool load
)
16978 unsigned int t
, t2
, n
;
16980 if (!reload_completed
)
16983 if (!offset_ok_for_ldrd_strd (offset
))
16990 if ((TARGET_THUMB2
)
16991 && ((wback
&& (n
== t
|| n
== t2
))
16992 || (t
== SP_REGNUM
)
16993 || (t
== PC_REGNUM
)
16994 || (t2
== SP_REGNUM
)
16995 || (t2
== PC_REGNUM
)
16996 || (!load
&& (n
== PC_REGNUM
))
16997 || (load
&& (t
== t2
))
16998 /* Triggers Cortex-M3 LDRD errata. */
16999 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
17003 && ((wback
&& (n
== t
|| n
== t2
))
17004 || (t2
== PC_REGNUM
)
17005 || (t
% 2 != 0) /* First destination register is not even. */
17007 /* PC can be used as base register (for offset addressing only),
17008 but it is depricated. */
17009 || (n
== PC_REGNUM
)))
17015 /* Return true if a 64-bit access with alignment ALIGN and with a
17016 constant offset OFFSET from the base pointer is permitted on this
17019 align_ok_ldrd_strd (HOST_WIDE_INT align
, HOST_WIDE_INT offset
)
17021 return (unaligned_access
17022 ? (align
>= BITS_PER_WORD
&& (offset
& 3) == 0)
17023 : (align
>= 2 * BITS_PER_WORD
&& (offset
& 7) == 0));
17026 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
17027 operand MEM's address contains an immediate offset from the base
17028 register and has no side effects, in which case it sets BASE,
17029 OFFSET and ALIGN accordingly. */
17031 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
, HOST_WIDE_INT
*align
)
17035 gcc_assert (base
!= NULL
&& offset
!= NULL
);
17037 /* TODO: Handle more general memory operand patterns, such as
17038 PRE_DEC and PRE_INC. */
17040 if (side_effects_p (mem
))
17043 /* Can't deal with subregs. */
17044 if (SUBREG_P (mem
))
17047 gcc_assert (MEM_P (mem
));
17049 *offset
= const0_rtx
;
17050 *align
= MEM_ALIGN (mem
);
17052 addr
= XEXP (mem
, 0);
17054 /* If addr isn't valid for DImode, then we can't handle it. */
17055 if (!arm_legitimate_address_p (DImode
, addr
,
17056 reload_in_progress
|| reload_completed
))
17064 else if (GET_CODE (addr
) == PLUS
)
17066 *base
= XEXP (addr
, 0);
17067 *offset
= XEXP (addr
, 1);
17068 return (REG_P (*base
) && CONST_INT_P (*offset
));
17074 /* Called from a peephole2 to replace two word-size accesses with a
17075 single LDRD/STRD instruction. Returns true iff we can generate a
17076 new instruction sequence. That is, both accesses use the same base
17077 register and the gap between constant offsets is 4. This function
17078 may reorder its operands to match ldrd/strd RTL templates.
17079 OPERANDS are the operands found by the peephole matcher;
17080 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
17081 corresponding memory operands. LOAD indicaates whether the access
17082 is load or store. CONST_STORE indicates a store of constant
17083 integer values held in OPERANDS[4,5] and assumes that the pattern
17084 is of length 4 insn, for the purpose of checking dead registers.
17085 COMMUTE indicates that register operands may be reordered. */
17087 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
17088 bool const_store
, bool commute
)
17091 HOST_WIDE_INT offsets
[2], offset
, align
[2];
17092 rtx base
= NULL_RTX
;
17093 rtx cur_base
, cur_offset
, tmp
;
17095 HARD_REG_SET regset
;
17097 gcc_assert (!const_store
|| !load
);
17098 /* Check that the memory references are immediate offsets from the
17099 same base register. Extract the base register, the destination
17100 registers, and the corresponding memory offsets. */
17101 for (i
= 0; i
< nops
; i
++)
17103 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
17109 else if (REGNO (base
) != REGNO (cur_base
))
17112 offsets
[i
] = INTVAL (cur_offset
);
17113 if (GET_CODE (operands
[i
]) == SUBREG
)
17115 tmp
= SUBREG_REG (operands
[i
]);
17116 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
17121 /* Make sure there is no dependency between the individual loads. */
17122 if (load
&& REGNO (operands
[0]) == REGNO (base
))
17123 return false; /* RAW */
17125 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
17126 return false; /* WAW */
17128 /* If the same input register is used in both stores
17129 when storing different constants, try to find a free register.
17130 For example, the code
17135 can be transformed into
17139 in Thumb mode assuming that r1 is free.
17140 For ARM mode do the same but only if the starting register
17141 can be made to be even. */
17143 && REGNO (operands
[0]) == REGNO (operands
[1])
17144 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
17148 CLEAR_HARD_REG_SET (regset
);
17149 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17150 if (tmp
== NULL_RTX
)
17153 /* Use the new register in the first load to ensure that
17154 if the original input register is not dead after peephole,
17155 then it will have the correct constant value. */
17158 else if (TARGET_ARM
)
17160 int regno
= REGNO (operands
[0]);
17161 if (!peep2_reg_dead_p (4, operands
[0]))
17163 /* When the input register is even and is not dead after the
17164 pattern, it has to hold the second constant but we cannot
17165 form a legal STRD in ARM mode with this register as the second
17167 if (regno
% 2 == 0)
17170 /* Is regno-1 free? */
17171 SET_HARD_REG_SET (regset
);
17172 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
17173 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17174 if (tmp
== NULL_RTX
)
17181 /* Find a DImode register. */
17182 CLEAR_HARD_REG_SET (regset
);
17183 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
17184 if (tmp
!= NULL_RTX
)
17186 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
17187 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
17191 /* Can we use the input register to form a DI register? */
17192 SET_HARD_REG_SET (regset
);
17193 CLEAR_HARD_REG_BIT(regset
,
17194 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
17195 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17196 if (tmp
== NULL_RTX
)
17198 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
17202 gcc_assert (operands
[0] != NULL_RTX
);
17203 gcc_assert (operands
[1] != NULL_RTX
);
17204 gcc_assert (REGNO (operands
[0]) % 2 == 0);
17205 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
17209 /* Make sure the instructions are ordered with lower memory access first. */
17210 if (offsets
[0] > offsets
[1])
17212 gap
= offsets
[0] - offsets
[1];
17213 offset
= offsets
[1];
17215 /* Swap the instructions such that lower memory is accessed first. */
17216 std::swap (operands
[0], operands
[1]);
17217 std::swap (operands
[2], operands
[3]);
17218 std::swap (align
[0], align
[1]);
17220 std::swap (operands
[4], operands
[5]);
17224 gap
= offsets
[1] - offsets
[0];
17225 offset
= offsets
[0];
17228 /* Make sure accesses are to consecutive memory locations. */
17229 if (gap
!= GET_MODE_SIZE (SImode
))
17232 if (!align_ok_ldrd_strd (align
[0], offset
))
17235 /* Make sure we generate legal instructions. */
17236 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17240 /* In Thumb state, where registers are almost unconstrained, there
17241 is little hope to fix it. */
17245 if (load
&& commute
)
17247 /* Try reordering registers. */
17248 std::swap (operands
[0], operands
[1]);
17249 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17256 /* If input registers are dead after this pattern, they can be
17257 reordered or replaced by other registers that are free in the
17258 current pattern. */
17259 if (!peep2_reg_dead_p (4, operands
[0])
17260 || !peep2_reg_dead_p (4, operands
[1]))
17263 /* Try to reorder the input registers. */
17264 /* For example, the code
17269 can be transformed into
17274 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
17277 std::swap (operands
[0], operands
[1]);
17281 /* Try to find a free DI register. */
17282 CLEAR_HARD_REG_SET (regset
);
17283 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
17284 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
17287 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
17288 if (tmp
== NULL_RTX
)
17291 /* DREG must be an even-numbered register in DImode.
17292 Split it into SI registers. */
17293 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
17294 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
17295 gcc_assert (operands
[0] != NULL_RTX
);
17296 gcc_assert (operands
[1] != NULL_RTX
);
17297 gcc_assert (REGNO (operands
[0]) % 2 == 0);
17298 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
17300 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
17310 /* Return true if parallel execution of the two word-size accesses provided
17311 could be satisfied with a single LDRD/STRD instruction. Two word-size
17312 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17313 register operands and OPERANDS[2,3] are the corresponding memory operands.
17316 valid_operands_ldrd_strd (rtx
*operands
, bool load
)
17319 HOST_WIDE_INT offsets
[2], offset
, align
[2];
17320 rtx base
= NULL_RTX
;
17321 rtx cur_base
, cur_offset
;
17324 /* Check that the memory references are immediate offsets from the
17325 same base register. Extract the base register, the destination
17326 registers, and the corresponding memory offsets. */
17327 for (i
= 0; i
< nops
; i
++)
17329 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
17335 else if (REGNO (base
) != REGNO (cur_base
))
17338 offsets
[i
] = INTVAL (cur_offset
);
17339 if (GET_CODE (operands
[i
]) == SUBREG
)
17343 if (offsets
[0] > offsets
[1])
17346 gap
= offsets
[1] - offsets
[0];
17347 offset
= offsets
[0];
17349 /* Make sure accesses are to consecutive memory locations. */
17350 if (gap
!= GET_MODE_SIZE (SImode
))
17353 if (!align_ok_ldrd_strd (align
[0], offset
))
17356 return operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17361 /* Print a symbolic form of X to the debug file, F. */
17363 arm_print_value (FILE *f
, rtx x
)
17365 switch (GET_CODE (x
))
17368 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
17374 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
17375 sizeof (fpstr
), 0, 1);
17385 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
17387 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
17388 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
17396 fprintf (f
, "\"%s\"", XSTR (x
, 0));
17400 fprintf (f
, "`%s'", XSTR (x
, 0));
17404 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
17408 arm_print_value (f
, XEXP (x
, 0));
17412 arm_print_value (f
, XEXP (x
, 0));
17414 arm_print_value (f
, XEXP (x
, 1));
17422 fprintf (f
, "????");
17427 /* Routines for manipulation of the constant pool. */
17429 /* Arm instructions cannot load a large constant directly into a
17430 register; they have to come from a pc relative load. The constant
17431 must therefore be placed in the addressable range of the pc
17432 relative load. Depending on the precise pc relative load
17433 instruction the range is somewhere between 256 bytes and 4k. This
17434 means that we often have to dump a constant inside a function, and
17435 generate code to branch around it.
17437 It is important to minimize this, since the branches will slow
17438 things down and make the code larger.
17440 Normally we can hide the table after an existing unconditional
17441 branch so that there is no interruption of the flow, but in the
17442 worst case the code looks like this:
17460 We fix this by performing a scan after scheduling, which notices
17461 which instructions need to have their operands fetched from the
17462 constant table and builds the table.
17464 The algorithm starts by building a table of all the constants that
17465 need fixing up and all the natural barriers in the function (places
17466 where a constant table can be dropped without breaking the flow).
17467 For each fixup we note how far the pc-relative replacement will be
17468 able to reach and the offset of the instruction into the function.
17470 Having built the table we then group the fixes together to form
17471 tables that are as large as possible (subject to addressing
17472 constraints) and emit each table of constants after the last
17473 barrier that is within range of all the instructions in the group.
17474 If a group does not contain a barrier, then we forcibly create one
17475 by inserting a jump instruction into the flow. Once the table has
17476 been inserted, the insns are then modified to reference the
17477 relevant entry in the pool.
17479 Possible enhancements to the algorithm (not implemented) are:
17481 1) For some processors and object formats, there may be benefit in
17482 aligning the pools to the start of cache lines; this alignment
17483 would need to be taken into account when calculating addressability
17486 /* These typedefs are located at the start of this file, so that
17487 they can be used in the prototypes there. This comment is to
17488 remind readers of that fact so that the following structures
17489 can be understood more easily.
17491 typedef struct minipool_node Mnode;
17492 typedef struct minipool_fixup Mfix; */
17494 struct minipool_node
17496 /* Doubly linked chain of entries. */
17499 /* The maximum offset into the code that this entry can be placed. While
17500 pushing fixes for forward references, all entries are sorted in order
17501 of increasing max_address. */
17502 HOST_WIDE_INT max_address
;
17503 /* Similarly for an entry inserted for a backwards ref. */
17504 HOST_WIDE_INT min_address
;
17505 /* The number of fixes referencing this entry. This can become zero
17506 if we "unpush" an entry. In this case we ignore the entry when we
17507 come to emit the code. */
17509 /* The offset from the start of the minipool. */
17510 HOST_WIDE_INT offset
;
17511 /* The value in table. */
17513 /* The mode of value. */
17515 /* The size of the value. With iWMMXt enabled
17516 sizes > 4 also imply an alignment of 8-bytes. */
17520 struct minipool_fixup
17524 HOST_WIDE_INT address
;
17530 HOST_WIDE_INT forwards
;
17531 HOST_WIDE_INT backwards
;
17534 /* Fixes less than a word need padding out to a word boundary. */
17535 #define MINIPOOL_FIX_SIZE(mode) \
17536 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17538 static Mnode
* minipool_vector_head
;
17539 static Mnode
* minipool_vector_tail
;
17540 static rtx_code_label
*minipool_vector_label
;
17541 static int minipool_pad
;
17543 /* The linked list of all minipool fixes required for this function. */
17544 Mfix
* minipool_fix_head
;
17545 Mfix
* minipool_fix_tail
;
17546 /* The fix entry for the current minipool, once it has been placed. */
17547 Mfix
* minipool_barrier
;
17549 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17550 #define JUMP_TABLES_IN_TEXT_SECTION 0
17553 static HOST_WIDE_INT
17554 get_jump_table_size (rtx_jump_table_data
*insn
)
17556 /* ADDR_VECs only take room if read-only data does into the text
17558 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
17560 rtx body
= PATTERN (insn
);
17561 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
17562 HOST_WIDE_INT size
;
17563 HOST_WIDE_INT modesize
;
17565 modesize
= GET_MODE_SIZE (GET_MODE (body
));
17566 size
= modesize
* XVECLEN (body
, elt
);
17570 /* Round up size of TBB table to a halfword boundary. */
17571 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
17574 /* No padding necessary for TBH. */
17577 /* Add two bytes for alignment on Thumb. */
17582 gcc_unreachable ();
17590 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17591 function descriptor) into a register and the GOT address into the
17592 FDPIC register, returning an rtx for the register holding the
17593 function address. */
17596 arm_load_function_descriptor (rtx funcdesc
)
17598 rtx fnaddr_reg
= gen_reg_rtx (Pmode
);
17599 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
17600 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
17601 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
17603 emit_move_insn (fnaddr_reg
, fnaddr
);
17605 /* The ABI requires the entry point address to be loaded first, but
17606 since we cannot support lazy binding for lack of atomic load of
17607 two 32-bits values, we do not need to bother to prevent the
17608 previous load from being moved after that of the GOT address. */
17609 emit_insn (gen_restore_pic_register_after_call (pic_reg
, gotaddr
));
17614 /* Return the maximum amount of padding that will be inserted before
17616 static HOST_WIDE_INT
17617 get_label_padding (rtx label
)
17619 HOST_WIDE_INT align
, min_insn_size
;
17621 align
= 1 << label_to_alignment (label
).levels
[0].log
;
17622 min_insn_size
= TARGET_THUMB
? 2 : 4;
17623 return align
> min_insn_size
? align
- min_insn_size
: 0;
17626 /* Move a minipool fix MP from its current location to before MAX_MP.
17627 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17628 constraints may need updating. */
17630 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
17631 HOST_WIDE_INT max_address
)
17633 /* The code below assumes these are different. */
17634 gcc_assert (mp
!= max_mp
);
17636 if (max_mp
== NULL
)
17638 if (max_address
< mp
->max_address
)
17639 mp
->max_address
= max_address
;
17643 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
17644 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
17646 mp
->max_address
= max_address
;
17648 /* Unlink MP from its current position. Since max_mp is non-null,
17649 mp->prev must be non-null. */
17650 mp
->prev
->next
= mp
->next
;
17651 if (mp
->next
!= NULL
)
17652 mp
->next
->prev
= mp
->prev
;
17654 minipool_vector_tail
= mp
->prev
;
17656 /* Re-insert it before MAX_MP. */
17658 mp
->prev
= max_mp
->prev
;
17661 if (mp
->prev
!= NULL
)
17662 mp
->prev
->next
= mp
;
17664 minipool_vector_head
= mp
;
17667 /* Save the new entry. */
17670 /* Scan over the preceding entries and adjust their addresses as
17672 while (mp
->prev
!= NULL
17673 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
17675 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
17682 /* Add a constant to the minipool for a forward reference. Returns the
17683 node added or NULL if the constant will not fit in this pool. */
17685 add_minipool_forward_ref (Mfix
*fix
)
17687 /* If set, max_mp is the first pool_entry that has a lower
17688 constraint than the one we are trying to add. */
17689 Mnode
* max_mp
= NULL
;
17690 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
17693 /* If the minipool starts before the end of FIX->INSN then this FIX
17694 cannot be placed into the current pool. Furthermore, adding the
17695 new constant pool entry may cause the pool to start FIX_SIZE bytes
17697 if (minipool_vector_head
&&
17698 (fix
->address
+ get_attr_length (fix
->insn
)
17699 >= minipool_vector_head
->max_address
- fix
->fix_size
))
17702 /* Scan the pool to see if a constant with the same value has
17703 already been added. While we are doing this, also note the
17704 location where we must insert the constant if it doesn't already
17706 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17708 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
17709 && fix
->mode
== mp
->mode
17710 && (!LABEL_P (fix
->value
)
17711 || (CODE_LABEL_NUMBER (fix
->value
)
17712 == CODE_LABEL_NUMBER (mp
->value
)))
17713 && rtx_equal_p (fix
->value
, mp
->value
))
17715 /* More than one fix references this entry. */
17717 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
17720 /* Note the insertion point if necessary. */
17722 && mp
->max_address
> max_address
)
17725 /* If we are inserting an 8-bytes aligned quantity and
17726 we have not already found an insertion point, then
17727 make sure that all such 8-byte aligned quantities are
17728 placed at the start of the pool. */
17729 if (ARM_DOUBLEWORD_ALIGN
17731 && fix
->fix_size
>= 8
17732 && mp
->fix_size
< 8)
17735 max_address
= mp
->max_address
;
17739 /* The value is not currently in the minipool, so we need to create
17740 a new entry for it. If MAX_MP is NULL, the entry will be put on
17741 the end of the list since the placement is less constrained than
17742 any existing entry. Otherwise, we insert the new fix before
17743 MAX_MP and, if necessary, adjust the constraints on the other
17746 mp
->fix_size
= fix
->fix_size
;
17747 mp
->mode
= fix
->mode
;
17748 mp
->value
= fix
->value
;
17750 /* Not yet required for a backwards ref. */
17751 mp
->min_address
= -65536;
17753 if (max_mp
== NULL
)
17755 mp
->max_address
= max_address
;
17757 mp
->prev
= minipool_vector_tail
;
17759 if (mp
->prev
== NULL
)
17761 minipool_vector_head
= mp
;
17762 minipool_vector_label
= gen_label_rtx ();
17765 mp
->prev
->next
= mp
;
17767 minipool_vector_tail
= mp
;
17771 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
17772 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
17774 mp
->max_address
= max_address
;
17777 mp
->prev
= max_mp
->prev
;
17779 if (mp
->prev
!= NULL
)
17780 mp
->prev
->next
= mp
;
17782 minipool_vector_head
= mp
;
17785 /* Save the new entry. */
17788 /* Scan over the preceding entries and adjust their addresses as
17790 while (mp
->prev
!= NULL
17791 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
17793 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
17801 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
17802 HOST_WIDE_INT min_address
)
17804 HOST_WIDE_INT offset
;
17806 /* The code below assumes these are different. */
17807 gcc_assert (mp
!= min_mp
);
17809 if (min_mp
== NULL
)
17811 if (min_address
> mp
->min_address
)
17812 mp
->min_address
= min_address
;
17816 /* We will adjust this below if it is too loose. */
17817 mp
->min_address
= min_address
;
17819 /* Unlink MP from its current position. Since min_mp is non-null,
17820 mp->next must be non-null. */
17821 mp
->next
->prev
= mp
->prev
;
17822 if (mp
->prev
!= NULL
)
17823 mp
->prev
->next
= mp
->next
;
17825 minipool_vector_head
= mp
->next
;
17827 /* Reinsert it after MIN_MP. */
17829 mp
->next
= min_mp
->next
;
17831 if (mp
->next
!= NULL
)
17832 mp
->next
->prev
= mp
;
17834 minipool_vector_tail
= mp
;
17840 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17842 mp
->offset
= offset
;
17843 if (mp
->refcount
> 0)
17844 offset
+= mp
->fix_size
;
17846 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
17847 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
17853 /* Add a constant to the minipool for a backward reference. Returns the
17854 node added or NULL if the constant will not fit in this pool.
17856 Note that the code for insertion for a backwards reference can be
17857 somewhat confusing because the calculated offsets for each fix do
17858 not take into account the size of the pool (which is still under
17861 add_minipool_backward_ref (Mfix
*fix
)
17863 /* If set, min_mp is the last pool_entry that has a lower constraint
17864 than the one we are trying to add. */
17865 Mnode
*min_mp
= NULL
;
17866 /* This can be negative, since it is only a constraint. */
17867 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
17870 /* If we can't reach the current pool from this insn, or if we can't
17871 insert this entry at the end of the pool without pushing other
17872 fixes out of range, then we don't try. This ensures that we
17873 can't fail later on. */
17874 if (min_address
>= minipool_barrier
->address
17875 || (minipool_vector_tail
->min_address
+ fix
->fix_size
17876 >= minipool_barrier
->address
))
17879 /* Scan the pool to see if a constant with the same value has
17880 already been added. While we are doing this, also note the
17881 location where we must insert the constant if it doesn't already
17883 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
17885 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
17886 && fix
->mode
== mp
->mode
17887 && (!LABEL_P (fix
->value
)
17888 || (CODE_LABEL_NUMBER (fix
->value
)
17889 == CODE_LABEL_NUMBER (mp
->value
)))
17890 && rtx_equal_p (fix
->value
, mp
->value
)
17891 /* Check that there is enough slack to move this entry to the
17892 end of the table (this is conservative). */
17893 && (mp
->max_address
17894 > (minipool_barrier
->address
17895 + minipool_vector_tail
->offset
17896 + minipool_vector_tail
->fix_size
)))
17899 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
17902 if (min_mp
!= NULL
)
17903 mp
->min_address
+= fix
->fix_size
;
17906 /* Note the insertion point if necessary. */
17907 if (mp
->min_address
< min_address
)
17909 /* For now, we do not allow the insertion of 8-byte alignment
17910 requiring nodes anywhere but at the start of the pool. */
17911 if (ARM_DOUBLEWORD_ALIGN
17912 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
17917 else if (mp
->max_address
17918 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
17920 /* Inserting before this entry would push the fix beyond
17921 its maximum address (which can happen if we have
17922 re-located a forwards fix); force the new fix to come
17924 if (ARM_DOUBLEWORD_ALIGN
17925 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
17930 min_address
= mp
->min_address
+ fix
->fix_size
;
17933 /* Do not insert a non-8-byte aligned quantity before 8-byte
17934 aligned quantities. */
17935 else if (ARM_DOUBLEWORD_ALIGN
17936 && fix
->fix_size
< 8
17937 && mp
->fix_size
>= 8)
17940 min_address
= mp
->min_address
+ fix
->fix_size
;
17945 /* We need to create a new entry. */
17947 mp
->fix_size
= fix
->fix_size
;
17948 mp
->mode
= fix
->mode
;
17949 mp
->value
= fix
->value
;
17951 mp
->max_address
= minipool_barrier
->address
+ 65536;
17953 mp
->min_address
= min_address
;
17955 if (min_mp
== NULL
)
17958 mp
->next
= minipool_vector_head
;
17960 if (mp
->next
== NULL
)
17962 minipool_vector_tail
= mp
;
17963 minipool_vector_label
= gen_label_rtx ();
17966 mp
->next
->prev
= mp
;
17968 minipool_vector_head
= mp
;
17972 mp
->next
= min_mp
->next
;
17976 if (mp
->next
!= NULL
)
17977 mp
->next
->prev
= mp
;
17979 minipool_vector_tail
= mp
;
17982 /* Save the new entry. */
17990 /* Scan over the following entries and adjust their offsets. */
17991 while (mp
->next
!= NULL
)
17993 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
17994 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
17997 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
17999 mp
->next
->offset
= mp
->offset
;
18008 assign_minipool_offsets (Mfix
*barrier
)
18010 HOST_WIDE_INT offset
= 0;
18013 minipool_barrier
= barrier
;
18015 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
18017 mp
->offset
= offset
;
18019 if (mp
->refcount
> 0)
18020 offset
+= mp
->fix_size
;
18024 /* Output the literal table */
18026 dump_minipool (rtx_insn
*scan
)
18032 if (ARM_DOUBLEWORD_ALIGN
)
18033 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
18034 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
18041 fprintf (dump_file
,
18042 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
18043 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
18045 scan
= emit_label_after (gen_label_rtx (), scan
);
18046 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
18047 scan
= emit_label_after (minipool_vector_label
, scan
);
18049 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
18051 if (mp
->refcount
> 0)
18055 fprintf (dump_file
,
18056 ";; Offset %u, min %ld, max %ld ",
18057 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
18058 (unsigned long) mp
->max_address
);
18059 arm_print_value (dump_file
, mp
->value
);
18060 fputc ('\n', dump_file
);
18063 rtx val
= copy_rtx (mp
->value
);
18065 switch (GET_MODE_SIZE (mp
->mode
))
18067 #ifdef HAVE_consttable_1
18069 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
18073 #ifdef HAVE_consttable_2
18075 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
18079 #ifdef HAVE_consttable_4
18081 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
18085 #ifdef HAVE_consttable_8
18087 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
18091 #ifdef HAVE_consttable_16
18093 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
18098 gcc_unreachable ();
18106 minipool_vector_head
= minipool_vector_tail
= NULL
;
18107 scan
= emit_insn_after (gen_consttable_end (), scan
);
18108 scan
= emit_barrier_after (scan
);
18111 /* Return the cost of forcibly inserting a barrier after INSN. */
18113 arm_barrier_cost (rtx_insn
*insn
)
18115 /* Basing the location of the pool on the loop depth is preferable,
18116 but at the moment, the basic block information seems to be
18117 corrupt by this stage of the compilation. */
18118 int base_cost
= 50;
18119 rtx_insn
*next
= next_nonnote_insn (insn
);
18121 if (next
!= NULL
&& LABEL_P (next
))
18124 switch (GET_CODE (insn
))
18127 /* It will always be better to place the table before the label, rather
18136 return base_cost
- 10;
18139 return base_cost
+ 10;
18143 /* Find the best place in the insn stream in the range
18144 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18145 Create the barrier by inserting a jump and add a new fix entry for
18148 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
18150 HOST_WIDE_INT count
= 0;
18151 rtx_barrier
*barrier
;
18152 rtx_insn
*from
= fix
->insn
;
18153 /* The instruction after which we will insert the jump. */
18154 rtx_insn
*selected
= NULL
;
18156 /* The address at which the jump instruction will be placed. */
18157 HOST_WIDE_INT selected_address
;
18159 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
18160 rtx_code_label
*label
= gen_label_rtx ();
18162 selected_cost
= arm_barrier_cost (from
);
18163 selected_address
= fix
->address
;
18165 while (from
&& count
< max_count
)
18167 rtx_jump_table_data
*tmp
;
18170 /* This code shouldn't have been called if there was a natural barrier
18172 gcc_assert (!BARRIER_P (from
));
18174 /* Count the length of this insn. This must stay in sync with the
18175 code that pushes minipool fixes. */
18176 if (LABEL_P (from
))
18177 count
+= get_label_padding (from
);
18179 count
+= get_attr_length (from
);
18181 /* If there is a jump table, add its length. */
18182 if (tablejump_p (from
, NULL
, &tmp
))
18184 count
+= get_jump_table_size (tmp
);
18186 /* Jump tables aren't in a basic block, so base the cost on
18187 the dispatch insn. If we select this location, we will
18188 still put the pool after the table. */
18189 new_cost
= arm_barrier_cost (from
);
18191 if (count
< max_count
18192 && (!selected
|| new_cost
<= selected_cost
))
18195 selected_cost
= new_cost
;
18196 selected_address
= fix
->address
+ count
;
18199 /* Continue after the dispatch table. */
18200 from
= NEXT_INSN (tmp
);
18204 new_cost
= arm_barrier_cost (from
);
18206 if (count
< max_count
18207 && (!selected
|| new_cost
<= selected_cost
))
18210 selected_cost
= new_cost
;
18211 selected_address
= fix
->address
+ count
;
18214 from
= NEXT_INSN (from
);
18217 /* Make sure that we found a place to insert the jump. */
18218 gcc_assert (selected
);
18220 /* Create a new JUMP_INSN that branches around a barrier. */
18221 from
= emit_jump_insn_after (gen_jump (label
), selected
);
18222 JUMP_LABEL (from
) = label
;
18223 barrier
= emit_barrier_after (from
);
18224 emit_label_after (label
, barrier
);
18226 /* Create a minipool barrier entry for the new barrier. */
18227 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
18228 new_fix
->insn
= barrier
;
18229 new_fix
->address
= selected_address
;
18230 new_fix
->next
= fix
->next
;
18231 fix
->next
= new_fix
;
18236 /* Record that there is a natural barrier in the insn stream at
18239 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
18241 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
18244 fix
->address
= address
;
18247 if (minipool_fix_head
!= NULL
)
18248 minipool_fix_tail
->next
= fix
;
18250 minipool_fix_head
= fix
;
18252 minipool_fix_tail
= fix
;
18255 /* Record INSN, which will need fixing up to load a value from the
18256 minipool. ADDRESS is the offset of the insn since the start of the
18257 function; LOC is a pointer to the part of the insn which requires
18258 fixing; VALUE is the constant that must be loaded, which is of type
18261 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
18262 machine_mode mode
, rtx value
)
18264 gcc_assert (!arm_disable_literal_pool
);
18265 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
18268 fix
->address
= address
;
18271 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
18272 fix
->value
= value
;
18273 fix
->forwards
= get_attr_pool_range (insn
);
18274 fix
->backwards
= get_attr_neg_pool_range (insn
);
18275 fix
->minipool
= NULL
;
18277 /* If an insn doesn't have a range defined for it, then it isn't
18278 expecting to be reworked by this code. Better to stop now than
18279 to generate duff assembly code. */
18280 gcc_assert (fix
->forwards
|| fix
->backwards
);
18282 /* If an entry requires 8-byte alignment then assume all constant pools
18283 require 4 bytes of padding. Trying to do this later on a per-pool
18284 basis is awkward because existing pool entries have to be modified. */
18285 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
18290 fprintf (dump_file
,
18291 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18292 GET_MODE_NAME (mode
),
18293 INSN_UID (insn
), (unsigned long) address
,
18294 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
18295 arm_print_value (dump_file
, fix
->value
);
18296 fprintf (dump_file
, "\n");
18299 /* Add it to the chain of fixes. */
18302 if (minipool_fix_head
!= NULL
)
18303 minipool_fix_tail
->next
= fix
;
18305 minipool_fix_head
= fix
;
18307 minipool_fix_tail
= fix
;
18310 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18311 Returns the number of insns needed, or 99 if we always want to synthesize
18314 arm_max_const_double_inline_cost ()
18316 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
18319 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18320 Returns the number of insns needed, or 99 if we don't know how to
18323 arm_const_double_inline_cost (rtx val
)
18325 rtx lowpart
, highpart
;
18328 mode
= GET_MODE (val
);
18330 if (mode
== VOIDmode
)
18333 gcc_assert (GET_MODE_SIZE (mode
) == 8);
18335 lowpart
= gen_lowpart (SImode
, val
);
18336 highpart
= gen_highpart_mode (SImode
, mode
, val
);
18338 gcc_assert (CONST_INT_P (lowpart
));
18339 gcc_assert (CONST_INT_P (highpart
));
18341 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
18342 NULL_RTX
, NULL_RTX
, 0, 0)
18343 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
18344 NULL_RTX
, NULL_RTX
, 0, 0));
18347 /* Cost of loading a SImode constant. */
18349 arm_const_inline_cost (enum rtx_code code
, rtx val
)
18351 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
18352 NULL_RTX
, NULL_RTX
, 1, 0);
18355 /* Return true if it is worthwhile to split a 64-bit constant into two
18356 32-bit operations. This is the case if optimizing for size, or
18357 if we have load delay slots, or if one 32-bit part can be done with
18358 a single data operation. */
18360 arm_const_double_by_parts (rtx val
)
18362 machine_mode mode
= GET_MODE (val
);
18365 if (optimize_size
|| arm_ld_sched
)
18368 if (mode
== VOIDmode
)
18371 part
= gen_highpart_mode (SImode
, mode
, val
);
18373 gcc_assert (CONST_INT_P (part
));
18375 if (const_ok_for_arm (INTVAL (part
))
18376 || const_ok_for_arm (~INTVAL (part
)))
18379 part
= gen_lowpart (SImode
, val
);
18381 gcc_assert (CONST_INT_P (part
));
18383 if (const_ok_for_arm (INTVAL (part
))
18384 || const_ok_for_arm (~INTVAL (part
)))
18390 /* Return true if it is possible to inline both the high and low parts
18391 of a 64-bit constant into 32-bit data processing instructions. */
18393 arm_const_double_by_immediates (rtx val
)
18395 machine_mode mode
= GET_MODE (val
);
18398 if (mode
== VOIDmode
)
18401 part
= gen_highpart_mode (SImode
, mode
, val
);
18403 gcc_assert (CONST_INT_P (part
));
18405 if (!const_ok_for_arm (INTVAL (part
)))
18408 part
= gen_lowpart (SImode
, val
);
18410 gcc_assert (CONST_INT_P (part
));
18412 if (!const_ok_for_arm (INTVAL (part
)))
18418 /* Scan INSN and note any of its operands that need fixing.
18419 If DO_PUSHES is false we do not actually push any of the fixups
18422 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
18426 extract_constrain_insn (insn
);
18428 if (recog_data
.n_alternatives
== 0)
18431 /* Fill in recog_op_alt with information about the constraints of
18433 preprocess_constraints (insn
);
18435 const operand_alternative
*op_alt
= which_op_alt ();
18436 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
18438 /* Things we need to fix can only occur in inputs. */
18439 if (recog_data
.operand_type
[opno
] != OP_IN
)
18442 /* If this alternative is a memory reference, then any mention
18443 of constants in this alternative is really to fool reload
18444 into allowing us to accept one there. We need to fix them up
18445 now so that we output the right code. */
18446 if (op_alt
[opno
].memory_ok
)
18448 rtx op
= recog_data
.operand
[opno
];
18450 if (CONSTANT_P (op
))
18453 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
18454 recog_data
.operand_mode
[opno
], op
);
18456 else if (MEM_P (op
)
18457 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
18458 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
18462 rtx cop
= avoid_constant_pool_reference (op
);
18464 /* Casting the address of something to a mode narrower
18465 than a word can cause avoid_constant_pool_reference()
18466 to return the pool reference itself. That's no good to
18467 us here. Lets just hope that we can use the
18468 constant pool value directly. */
18470 cop
= get_pool_constant (XEXP (op
, 0));
18472 push_minipool_fix (insn
, address
,
18473 recog_data
.operand_loc
[opno
],
18474 recog_data
.operand_mode
[opno
], cop
);
18484 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18485 and unions in the context of ARMv8-M Security Extensions. It is used as a
18486 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18487 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18488 or four masks, depending on whether it is being computed for a
18489 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18490 respectively. The tree for the type of the argument or a field within an
18491 argument is passed in ARG_TYPE, the current register this argument or field
18492 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18493 argument or field starts at is passed in STARTING_BIT and the last used bit
18494 is kept in LAST_USED_BIT which is also updated accordingly. */
18496 static unsigned HOST_WIDE_INT
18497 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
18498 uint32_t * padding_bits_to_clear
,
18499 unsigned starting_bit
, int * last_used_bit
)
18502 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
18504 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
18506 unsigned current_bit
= starting_bit
;
18508 long int offset
, size
;
18511 field
= TYPE_FIELDS (arg_type
);
18514 /* The offset within a structure is always an offset from
18515 the start of that structure. Make sure we take that into the
18516 calculation of the register based offset that we use here. */
18517 offset
= starting_bit
;
18518 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
18521 /* This is the actual size of the field, for bitfields this is the
18522 bitfield width and not the container size. */
18523 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
18525 if (*last_used_bit
!= offset
)
18527 if (offset
< *last_used_bit
)
18529 /* This field's offset is before the 'last_used_bit', that
18530 means this field goes on the next register. So we need to
18531 pad the rest of the current register and increase the
18532 register number. */
18534 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
18537 padding_bits_to_clear
[*regno
] |= mask
;
18538 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18543 /* Otherwise we pad the bits between the last field's end and
18544 the start of the new field. */
18547 mask
= ((uint32_t)-1) >> (32 - offset
);
18548 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
18549 padding_bits_to_clear
[*regno
] |= mask
;
18551 current_bit
= offset
;
18554 /* Calculate further padding bits for inner structs/unions too. */
18555 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
18557 *last_used_bit
= current_bit
;
18558 not_to_clear_reg_mask
18559 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
18560 padding_bits_to_clear
, offset
,
18565 /* Update 'current_bit' with this field's size. If the
18566 'current_bit' lies in a subsequent register, update 'regno' and
18567 reset 'current_bit' to point to the current bit in that new
18569 current_bit
+= size
;
18570 while (current_bit
>= 32)
18573 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18576 *last_used_bit
= current_bit
;
18579 field
= TREE_CHAIN (field
);
18581 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18583 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
18585 tree field
, field_t
;
18586 int i
, regno_t
, field_size
;
18590 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
18591 = {-1, -1, -1, -1};
18593 /* To compute the padding bits in a union we only consider bits as
18594 padding bits if they are always either a padding bit or fall outside a
18595 fields size for all fields in the union. */
18596 field
= TYPE_FIELDS (arg_type
);
18599 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
18600 = {0U, 0U, 0U, 0U};
18601 int last_used_bit_t
= *last_used_bit
;
18603 field_t
= TREE_TYPE (field
);
18605 /* If the field's type is either a record or a union make sure to
18606 compute their padding bits too. */
18607 if (RECORD_OR_UNION_TYPE_P (field_t
))
18608 not_to_clear_reg_mask
18609 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
18610 &padding_bits_to_clear_t
[0],
18611 starting_bit
, &last_used_bit_t
);
18614 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
18615 regno_t
= (field_size
/ 32) + *regno
;
18616 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
18619 for (i
= *regno
; i
< regno_t
; i
++)
18621 /* For all but the last register used by this field only keep the
18622 padding bits that were padding bits in this field. */
18623 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
18626 /* For the last register, keep all padding bits that were padding
18627 bits in this field and any padding bits that are still valid
18628 as padding bits but fall outside of this field's size. */
18629 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
18630 padding_bits_to_clear_res
[regno_t
]
18631 &= padding_bits_to_clear_t
[regno_t
] | mask
;
18633 /* Update the maximum size of the fields in terms of registers used
18634 ('max_reg') and the 'last_used_bit' in said register. */
18635 if (max_reg
< regno_t
)
18638 max_bit
= last_used_bit_t
;
18640 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
18641 max_bit
= last_used_bit_t
;
18643 field
= TREE_CHAIN (field
);
18646 /* Update the current padding_bits_to_clear using the intersection of the
18647 padding bits of all the fields. */
18648 for (i
=*regno
; i
< max_reg
; i
++)
18649 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
18651 /* Do not keep trailing padding bits, we do not know yet whether this
18652 is the end of the argument. */
18653 mask
= ((uint32_t) 1 << max_bit
) - 1;
18654 padding_bits_to_clear
[max_reg
]
18655 |= padding_bits_to_clear_res
[max_reg
] & mask
;
18658 *last_used_bit
= max_bit
;
18661 /* This function should only be used for structs and unions. */
18662 gcc_unreachable ();
18664 return not_to_clear_reg_mask
;
18667 /* In the context of ARMv8-M Security Extensions, this function is used for both
18668 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18669 registers are used when returning or passing arguments, which is then
18670 returned as a mask. It will also compute a mask to indicate padding/unused
18671 bits for each of these registers, and passes this through the
18672 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18673 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18674 the starting register used to pass this argument or return value is passed
18675 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18676 for struct and union types. */
18678 static unsigned HOST_WIDE_INT
18679 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
18680 uint32_t * padding_bits_to_clear
)
18683 int last_used_bit
= 0;
18684 unsigned HOST_WIDE_INT not_to_clear_mask
;
18686 if (RECORD_OR_UNION_TYPE_P (arg_type
))
18689 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
18690 padding_bits_to_clear
, 0,
18694 /* If the 'last_used_bit' is not zero, that means we are still using a
18695 part of the last 'regno'. In such cases we must clear the trailing
18696 bits. Otherwise we are not using regno and we should mark it as to
18698 if (last_used_bit
!= 0)
18699 padding_bits_to_clear
[regno
]
18700 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
18702 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
18706 not_to_clear_mask
= 0;
18707 /* We are not dealing with structs nor unions. So these arguments may be
18708 passed in floating point registers too. In some cases a BLKmode is
18709 used when returning or passing arguments in multiple VFP registers. */
18710 if (GET_MODE (arg_rtx
) == BLKmode
)
18715 /* This should really only occur when dealing with the hard-float
18717 gcc_assert (TARGET_HARD_FLOAT_ABI
);
18719 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
18721 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
18722 gcc_assert (REG_P (reg
));
18724 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
18726 /* If we are dealing with DF mode, make sure we don't
18727 clear either of the registers it addresses. */
18728 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
18731 unsigned HOST_WIDE_INT mask
;
18732 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
18733 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
18734 not_to_clear_mask
|= mask
;
18740 /* Otherwise we can rely on the MODE to determine how many registers
18741 are being used by this argument. */
18742 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
18743 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
18746 unsigned HOST_WIDE_INT
18747 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
18748 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
18749 not_to_clear_mask
|= mask
;
18754 return not_to_clear_mask
;
18757 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18758 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18759 are to be fully cleared, using the value in register CLEARING_REG if more
18760 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18761 the bits that needs to be cleared in caller-saved core registers, with
18762 SCRATCH_REG used as a scratch register for that clearing.
18764 NOTE: one of three following assertions must hold:
18765 - SCRATCH_REG is a low register
18766 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18767 in TO_CLEAR_BITMAP)
18768 - CLEARING_REG is a low register. */
18771 cmse_clear_registers (sbitmap to_clear_bitmap
, uint32_t *padding_bits_to_clear
,
18772 int padding_bits_len
, rtx scratch_reg
, rtx clearing_reg
)
18774 bool saved_clearing
= false;
18775 rtx saved_clearing_reg
= NULL_RTX
;
18776 int i
, regno
, clearing_regno
, minregno
= R0_REGNUM
, maxregno
= minregno
- 1;
18778 gcc_assert (arm_arch_cmse
);
18780 if (!bitmap_empty_p (to_clear_bitmap
))
18782 minregno
= bitmap_first_set_bit (to_clear_bitmap
);
18783 maxregno
= bitmap_last_set_bit (to_clear_bitmap
);
18785 clearing_regno
= REGNO (clearing_reg
);
18787 /* Clear padding bits. */
18788 gcc_assert (padding_bits_len
<= NUM_ARG_REGS
);
18789 for (i
= 0, regno
= R0_REGNUM
; i
< padding_bits_len
; i
++, regno
++)
18792 rtx rtx16
, dest
, cleared_reg
= gen_rtx_REG (SImode
, regno
);
18794 if (padding_bits_to_clear
[i
] == 0)
18797 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18798 CLEARING_REG as scratch. */
18800 && REGNO (scratch_reg
) > LAST_LO_REGNUM
)
18802 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18803 such that we can use clearing_reg to clear the unused bits in the
18805 if ((clearing_regno
> maxregno
18806 || !bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
18807 && !saved_clearing
)
18809 gcc_assert (clearing_regno
<= LAST_LO_REGNUM
);
18810 emit_move_insn (scratch_reg
, clearing_reg
);
18811 saved_clearing
= true;
18812 saved_clearing_reg
= scratch_reg
;
18814 scratch_reg
= clearing_reg
;
18817 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18818 mask
= (~padding_bits_to_clear
[i
]) & 0xFFFF;
18819 emit_move_insn (scratch_reg
, gen_int_mode (mask
, SImode
));
18821 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18822 mask
= (~padding_bits_to_clear
[i
]) >> 16;
18823 rtx16
= gen_int_mode (16, SImode
);
18824 dest
= gen_rtx_ZERO_EXTRACT (SImode
, scratch_reg
, rtx16
, rtx16
);
18826 emit_insn (gen_rtx_SET (dest
, gen_int_mode (mask
, SImode
)));
18828 emit_insn (gen_andsi3 (cleared_reg
, cleared_reg
, scratch_reg
));
18830 if (saved_clearing
)
18831 emit_move_insn (clearing_reg
, saved_clearing_reg
);
18834 /* Clear full registers. */
18836 if (TARGET_HAVE_FPCXT_CMSE
)
18839 int i
, j
, k
, nb_regs
;
18840 rtx use_seq
, par
, reg
, set
, vunspec
;
18841 int to_clear_bitmap_size
= SBITMAP_SIZE (to_clear_bitmap
);
18842 auto_sbitmap
core_regs_bitmap (to_clear_bitmap_size
);
18843 auto_sbitmap
to_clear_core_bitmap (to_clear_bitmap_size
);
18845 for (i
= FIRST_VFP_REGNUM
; i
<= maxregno
; i
+= nb_regs
)
18847 /* Find next register to clear and exit if none. */
18848 for (; i
<= maxregno
&& !bitmap_bit_p (to_clear_bitmap
, i
); i
++);
18852 /* Compute number of consecutive registers to clear. */
18853 for (j
= i
; j
<= maxregno
&& bitmap_bit_p (to_clear_bitmap
, j
);
18857 /* Create VSCCLRM RTX pattern. */
18858 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nb_regs
+ 1));
18859 vunspec_vec
= gen_rtvec (1, gen_int_mode (0, SImode
));
18860 vunspec
= gen_rtx_UNSPEC_VOLATILE (SImode
, vunspec_vec
,
18861 VUNSPEC_VSCCLRM_VPR
);
18862 XVECEXP (par
, 0, 0) = vunspec
;
18864 /* Insert VFP register clearing RTX in the pattern. */
18866 for (k
= 1, j
= i
; j
<= maxregno
&& k
< nb_regs
+ 1; j
++)
18868 if (!bitmap_bit_p (to_clear_bitmap
, j
))
18871 reg
= gen_rtx_REG (SFmode
, j
);
18872 set
= gen_rtx_SET (reg
, const0_rtx
);
18873 XVECEXP (par
, 0, k
++) = set
;
18876 use_seq
= get_insns ();
18879 emit_insn_after (use_seq
, emit_insn (par
));
18882 /* Get set of core registers to clear. */
18883 bitmap_clear (core_regs_bitmap
);
18884 bitmap_set_range (core_regs_bitmap
, R0_REGNUM
,
18885 IP_REGNUM
- R0_REGNUM
+ 1);
18886 bitmap_and (to_clear_core_bitmap
, to_clear_bitmap
,
18888 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap
));
18890 if (bitmap_empty_p (to_clear_core_bitmap
))
18893 /* Create clrm RTX pattern. */
18894 nb_regs
= bitmap_count_bits (to_clear_core_bitmap
);
18895 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nb_regs
+ 2));
18897 /* Insert core register clearing RTX in the pattern. */
18899 for (j
= 0, i
= minregno
; j
< nb_regs
; i
++)
18901 if (!bitmap_bit_p (to_clear_core_bitmap
, i
))
18904 reg
= gen_rtx_REG (SImode
, i
);
18905 set
= gen_rtx_SET (reg
, const0_rtx
);
18906 XVECEXP (par
, 0, j
++) = set
;
18910 /* Insert APSR register clearing RTX in the pattern
18911 * along with clobbering CC. */
18912 vunspec_vec
= gen_rtvec (1, gen_int_mode (0, SImode
));
18913 vunspec
= gen_rtx_UNSPEC_VOLATILE (SImode
, vunspec_vec
,
18914 VUNSPEC_CLRM_APSR
);
18916 XVECEXP (par
, 0, j
++) = vunspec
;
18918 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
18919 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
18920 XVECEXP (par
, 0, j
) = clobber
;
18922 use_seq
= get_insns ();
18925 emit_insn_after (use_seq
, emit_insn (par
));
18929 /* If not marked for clearing, clearing_reg already does not contain
18931 if (clearing_regno
<= maxregno
18932 && bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
18934 emit_move_insn (clearing_reg
, const0_rtx
);
18935 emit_use (clearing_reg
);
18936 bitmap_clear_bit (to_clear_bitmap
, clearing_regno
);
18939 for (regno
= minregno
; regno
<= maxregno
; regno
++)
18941 if (!bitmap_bit_p (to_clear_bitmap
, regno
))
18944 if (IS_VFP_REGNUM (regno
))
18946 /* If regno is an even vfp register and its successor is also to
18947 be cleared, use vmov. */
18948 if (TARGET_VFP_DOUBLE
18949 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
18950 && bitmap_bit_p (to_clear_bitmap
, regno
+ 1))
18952 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
18953 CONST1_RTX (DFmode
));
18954 emit_use (gen_rtx_REG (DFmode
, regno
));
18959 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
18960 CONST1_RTX (SFmode
));
18961 emit_use (gen_rtx_REG (SFmode
, regno
));
18966 emit_move_insn (gen_rtx_REG (SImode
, regno
), clearing_reg
);
18967 emit_use (gen_rtx_REG (SImode
, regno
));
18973 /* Clear core and caller-saved VFP registers not used to pass arguments before
18974 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
18975 registers is done in the __gnu_cmse_nonsecure_call libcall. See
18976 libgcc/config/arm/cmse_nonsecure_call.S. */
18979 cmse_nonsecure_call_inline_register_clear (void)
18983 FOR_EACH_BB_FN (bb
, cfun
)
18987 FOR_BB_INSNS (bb
, insn
)
18989 bool clear_callee_saved
= TARGET_HAVE_FPCXT_CMSE
;
18990 /* frame = VFP regs + FPSCR + VPR. */
18991 unsigned lazy_store_stack_frame_size
18992 = (LAST_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1 + 2) * UNITS_PER_WORD
;
18993 unsigned long callee_saved_mask
18994 = ((1 << (LAST_HI_REGNUM
+ 1)) - 1)
18995 & ~((1 << (LAST_ARG_REGNUM
+ 1)) - 1);
18996 unsigned address_regnum
, regno
;
18997 unsigned max_int_regno
18998 = clear_callee_saved
? IP_REGNUM
: LAST_ARG_REGNUM
;
18999 unsigned max_fp_regno
19000 = TARGET_HAVE_FPCXT_CMSE
? LAST_VFP_REGNUM
: D7_VFP_REGNUM
;
19002 = TARGET_HARD_FLOAT_ABI
? max_fp_regno
: max_int_regno
;
19003 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
19005 rtx pat
, call
, unspec
, clearing_reg
, ip_reg
, shift
;
19007 CUMULATIVE_ARGS args_so_far_v
;
19008 cumulative_args_t args_so_far
;
19009 tree arg_type
, fntype
;
19010 bool first_param
= true, lazy_fpclear
= !TARGET_HARD_FLOAT_ABI
;
19011 function_args_iterator args_iter
;
19012 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
19014 if (!NONDEBUG_INSN_P (insn
))
19017 if (!CALL_P (insn
))
19020 pat
= PATTERN (insn
);
19021 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
19022 call
= XVECEXP (pat
, 0, 0);
19024 /* Get the real call RTX if the insn sets a value, ie. returns. */
19025 if (GET_CODE (call
) == SET
)
19026 call
= SET_SRC (call
);
19028 /* Check if it is a cmse_nonsecure_call. */
19029 unspec
= XEXP (call
, 0);
19030 if (GET_CODE (unspec
) != UNSPEC
19031 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
19034 /* Mark registers that needs to be cleared. Those that holds a
19035 parameter are removed from the set further below. */
19036 bitmap_clear (to_clear_bitmap
);
19037 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
,
19038 max_int_regno
- R0_REGNUM
+ 1);
19040 /* Only look at the caller-saved floating point registers in case of
19041 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
19042 lazy store and loads which clear both caller- and callee-saved
19046 auto_sbitmap
float_bitmap (maxregno
+ 1);
19048 bitmap_clear (float_bitmap
);
19049 bitmap_set_range (float_bitmap
, FIRST_VFP_REGNUM
,
19050 max_fp_regno
- FIRST_VFP_REGNUM
+ 1);
19051 bitmap_ior (to_clear_bitmap
, to_clear_bitmap
, float_bitmap
);
19054 /* Make sure the register used to hold the function address is not
19056 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
19057 gcc_assert (MEM_P (address
));
19058 gcc_assert (REG_P (XEXP (address
, 0)));
19059 address_regnum
= REGNO (XEXP (address
, 0));
19060 if (address_regnum
<= max_int_regno
)
19061 bitmap_clear_bit (to_clear_bitmap
, address_regnum
);
19063 /* Set basic block of call insn so that df rescan is performed on
19064 insns inserted here. */
19065 set_block_for_insn (insn
, bb
);
19066 df_set_flags (DF_DEFER_INSN_RESCAN
);
19069 /* Make sure the scheduler doesn't schedule other insns beyond
19071 emit_insn (gen_blockage ());
19073 /* Walk through all arguments and clear registers appropriately.
19075 fntype
= TREE_TYPE (MEM_EXPR (address
));
19076 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
19078 args_so_far
= pack_cumulative_args (&args_so_far_v
);
19079 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
19082 uint64_t to_clear_args_mask
;
19084 if (VOID_TYPE_P (arg_type
))
19087 function_arg_info
arg (arg_type
, /*named=*/true);
19089 /* ??? We should advance after processing the argument and pass
19090 the argument we're advancing past. */
19091 arm_function_arg_advance (args_so_far
, arg
);
19093 arg_rtx
= arm_function_arg (args_so_far
, arg
);
19094 gcc_assert (REG_P (arg_rtx
));
19096 = compute_not_to_clear_mask (arg_type
, arg_rtx
,
19098 &padding_bits_to_clear
[0]);
19099 if (to_clear_args_mask
)
19101 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
19103 if (to_clear_args_mask
& (1ULL << regno
))
19104 bitmap_clear_bit (to_clear_bitmap
, regno
);
19108 first_param
= false;
19111 /* We use right shift and left shift to clear the LSB of the address
19112 we jump to instead of using bic, to avoid having to use an extra
19113 register on Thumb-1. */
19114 clearing_reg
= XEXP (address
, 0);
19115 shift
= gen_rtx_LSHIFTRT (SImode
, clearing_reg
, const1_rtx
);
19116 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
19117 shift
= gen_rtx_ASHIFT (SImode
, clearing_reg
, const1_rtx
);
19118 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
19120 if (clear_callee_saved
)
19123 emit_multi_reg_push (callee_saved_mask
, callee_saved_mask
);
19124 /* Disable frame debug info in push because it needs to be
19125 disabled for pop (see below). */
19126 RTX_FRAME_RELATED_P (push_insn
) = 0;
19128 /* Lazy store multiple. */
19132 rtx_insn
*add_insn
;
19134 imm
= gen_int_mode (- lazy_store_stack_frame_size
, SImode
);
19135 add_insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
19136 stack_pointer_rtx
, imm
));
19137 /* If we have the frame pointer, then it will be the
19138 CFA reg. Otherwise, the stack pointer is the CFA
19139 reg, so we need to emit a CFA adjust. */
19140 if (!frame_pointer_needed
)
19141 arm_add_cfa_adjust_cfa_note (add_insn
,
19142 - lazy_store_stack_frame_size
,
19144 stack_pointer_rtx
);
19145 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx
));
19147 /* Save VFP callee-saved registers. */
19150 vfp_emit_fstmd (D7_VFP_REGNUM
+ 1,
19151 (max_fp_regno
- D7_VFP_REGNUM
) / 2);
19152 /* Disable frame debug info in push because it needs to be
19153 disabled for vpop (see below). */
19154 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19158 /* Clear caller-saved registers that leak before doing a non-secure
19160 ip_reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
19161 cmse_clear_registers (to_clear_bitmap
, padding_bits_to_clear
,
19162 NUM_ARG_REGS
, ip_reg
, clearing_reg
);
19164 seq
= get_insns ();
19166 emit_insn_before (seq
, insn
);
19168 if (TARGET_HAVE_FPCXT_CMSE
)
19170 rtx_insn
*last
, *pop_insn
, *after
= insn
;
19174 /* Lazy load multiple done as part of libcall in Armv8-M. */
19177 rtx imm
= gen_int_mode (lazy_store_stack_frame_size
, SImode
);
19178 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx
));
19179 rtx_insn
*add_insn
=
19180 emit_insn (gen_addsi3 (stack_pointer_rtx
,
19181 stack_pointer_rtx
, imm
));
19182 if (!frame_pointer_needed
)
19183 arm_add_cfa_adjust_cfa_note (add_insn
,
19184 lazy_store_stack_frame_size
,
19186 stack_pointer_rtx
);
19188 /* Restore VFP callee-saved registers. */
19191 int nb_callee_saved_vfp_regs
=
19192 (max_fp_regno
- D7_VFP_REGNUM
) / 2;
19193 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM
+ 1,
19194 nb_callee_saved_vfp_regs
,
19195 stack_pointer_rtx
);
19196 /* Disable frame debug info in vpop because the SP adjustment
19197 is made using a CFA adjustment note while CFA used is
19198 sometimes R7. This then causes an assert failure in the
19199 CFI note creation code. */
19200 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19203 arm_emit_multi_reg_pop (callee_saved_mask
);
19204 pop_insn
= get_last_insn ();
19206 /* Disable frame debug info in pop because they reset the state
19207 of popped registers to what it was at the beginning of the
19208 function, before the prologue. This leads to incorrect state
19209 when doing the pop after the nonsecure call for registers that
19210 are pushed both in prologue and before the nonsecure call.
19212 It also occasionally triggers an assert failure in CFI note
19213 creation code when there are two codepaths to the epilogue,
19214 one of which does not go through the nonsecure call.
19215 Obviously this mean that debugging between the push and pop is
19217 RTX_FRAME_RELATED_P (pop_insn
) = 0;
19219 seq
= get_insns ();
19220 last
= get_last_insn ();
19223 emit_insn_after (seq
, after
);
19225 /* Skip pop we have just inserted after nonsecure call, we know
19226 it does not contain a nonsecure call. */
19233 /* Rewrite move insn into subtract of 0 if the condition codes will
19234 be useful in next conditional jump insn. */
19237 thumb1_reorg (void)
19241 FOR_EACH_BB_FN (bb
, cfun
)
19244 rtx cmp
, op0
, op1
, set
= NULL
;
19245 rtx_insn
*prev
, *insn
= BB_END (bb
);
19246 bool insn_clobbered
= false;
19248 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
19249 insn
= PREV_INSN (insn
);
19251 /* Find the last cbranchsi4_insn in basic block BB. */
19252 if (insn
== BB_HEAD (bb
)
19253 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
19256 /* Get the register with which we are comparing. */
19257 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
19258 op0
= XEXP (cmp
, 0);
19259 op1
= XEXP (cmp
, 1);
19261 /* Check that comparison is against ZERO. */
19262 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
19265 /* Find the first flag setting insn before INSN in basic block BB. */
19266 gcc_assert (insn
!= BB_HEAD (bb
));
19267 for (prev
= PREV_INSN (insn
);
19269 && prev
!= BB_HEAD (bb
)
19271 || DEBUG_INSN_P (prev
)
19272 || ((set
= single_set (prev
)) != NULL
19273 && get_attr_conds (prev
) == CONDS_NOCOND
)));
19274 prev
= PREV_INSN (prev
))
19276 if (reg_set_p (op0
, prev
))
19277 insn_clobbered
= true;
19280 /* Skip if op0 is clobbered by insn other than prev. */
19281 if (insn_clobbered
)
19287 dest
= SET_DEST (set
);
19288 src
= SET_SRC (set
);
19289 if (!low_register_operand (dest
, SImode
)
19290 || !low_register_operand (src
, SImode
))
19293 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19294 in INSN. Both src and dest of the move insn are checked. */
19295 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
19297 dest
= copy_rtx (dest
);
19298 src
= copy_rtx (src
);
19299 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
19300 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
19301 INSN_CODE (prev
) = -1;
19302 /* Set test register in INSN to dest. */
19303 XEXP (cmp
, 0) = copy_rtx (dest
);
19304 INSN_CODE (insn
) = -1;
19309 /* Convert instructions to their cc-clobbering variant if possible, since
19310 that allows us to use smaller encodings. */
19313 thumb2_reorg (void)
19318 INIT_REG_SET (&live
);
19320 /* We are freeing block_for_insn in the toplev to keep compatibility
19321 with old MDEP_REORGS that are not CFG based. Recompute it now. */
19322 compute_bb_for_insn ();
19325 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
19327 FOR_EACH_BB_FN (bb
, cfun
)
19329 if ((current_tune
->disparage_flag_setting_t16_encodings
19330 == tune_params::DISPARAGE_FLAGS_ALL
)
19331 && optimize_bb_for_speed_p (bb
))
19335 Convert_Action action
= SKIP
;
19336 Convert_Action action_for_partial_flag_setting
19337 = ((current_tune
->disparage_flag_setting_t16_encodings
19338 != tune_params::DISPARAGE_FLAGS_NEITHER
)
19339 && optimize_bb_for_speed_p (bb
))
19342 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
19343 df_simulate_initialize_backwards (bb
, &live
);
19344 FOR_BB_INSNS_REVERSE (bb
, insn
)
19346 if (NONJUMP_INSN_P (insn
)
19347 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
19348 && GET_CODE (PATTERN (insn
)) == SET
)
19351 rtx pat
= PATTERN (insn
);
19352 rtx dst
= XEXP (pat
, 0);
19353 rtx src
= XEXP (pat
, 1);
19354 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
19356 if (UNARY_P (src
) || BINARY_P (src
))
19357 op0
= XEXP (src
, 0);
19359 if (BINARY_P (src
))
19360 op1
= XEXP (src
, 1);
19362 if (low_register_operand (dst
, SImode
))
19364 switch (GET_CODE (src
))
19367 /* Adding two registers and storing the result
19368 in the first source is already a 16-bit
19370 if (rtx_equal_p (dst
, op0
)
19371 && register_operand (op1
, SImode
))
19374 if (low_register_operand (op0
, SImode
))
19376 /* ADDS <Rd>,<Rn>,<Rm> */
19377 if (low_register_operand (op1
, SImode
))
19379 /* ADDS <Rdn>,#<imm8> */
19380 /* SUBS <Rdn>,#<imm8> */
19381 else if (rtx_equal_p (dst
, op0
)
19382 && CONST_INT_P (op1
)
19383 && IN_RANGE (INTVAL (op1
), -255, 255))
19385 /* ADDS <Rd>,<Rn>,#<imm3> */
19386 /* SUBS <Rd>,<Rn>,#<imm3> */
19387 else if (CONST_INT_P (op1
)
19388 && IN_RANGE (INTVAL (op1
), -7, 7))
19391 /* ADCS <Rd>, <Rn> */
19392 else if (GET_CODE (XEXP (src
, 0)) == PLUS
19393 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
19394 && low_register_operand (XEXP (XEXP (src
, 0), 1),
19396 && COMPARISON_P (op1
)
19397 && cc_register (XEXP (op1
, 0), VOIDmode
)
19398 && maybe_get_arm_condition_code (op1
) == ARM_CS
19399 && XEXP (op1
, 1) == const0_rtx
)
19404 /* RSBS <Rd>,<Rn>,#0
19405 Not handled here: see NEG below. */
19406 /* SUBS <Rd>,<Rn>,#<imm3>
19408 Not handled here: see PLUS above. */
19409 /* SUBS <Rd>,<Rn>,<Rm> */
19410 if (low_register_operand (op0
, SImode
)
19411 && low_register_operand (op1
, SImode
))
19416 /* MULS <Rdm>,<Rn>,<Rdm>
19417 As an exception to the rule, this is only used
19418 when optimizing for size since MULS is slow on all
19419 known implementations. We do not even want to use
19420 MULS in cold code, if optimizing for speed, so we
19421 test the global flag here. */
19422 if (!optimize_size
)
19424 /* Fall through. */
19428 /* ANDS <Rdn>,<Rm> */
19429 if (rtx_equal_p (dst
, op0
)
19430 && low_register_operand (op1
, SImode
))
19431 action
= action_for_partial_flag_setting
;
19432 else if (rtx_equal_p (dst
, op1
)
19433 && low_register_operand (op0
, SImode
))
19434 action
= action_for_partial_flag_setting
== SKIP
19435 ? SKIP
: SWAP_CONV
;
19441 /* ASRS <Rdn>,<Rm> */
19442 /* LSRS <Rdn>,<Rm> */
19443 /* LSLS <Rdn>,<Rm> */
19444 if (rtx_equal_p (dst
, op0
)
19445 && low_register_operand (op1
, SImode
))
19446 action
= action_for_partial_flag_setting
;
19447 /* ASRS <Rd>,<Rm>,#<imm5> */
19448 /* LSRS <Rd>,<Rm>,#<imm5> */
19449 /* LSLS <Rd>,<Rm>,#<imm5> */
19450 else if (low_register_operand (op0
, SImode
)
19451 && CONST_INT_P (op1
)
19452 && IN_RANGE (INTVAL (op1
), 0, 31))
19453 action
= action_for_partial_flag_setting
;
19457 /* RORS <Rdn>,<Rm> */
19458 if (rtx_equal_p (dst
, op0
)
19459 && low_register_operand (op1
, SImode
))
19460 action
= action_for_partial_flag_setting
;
19464 /* MVNS <Rd>,<Rm> */
19465 if (low_register_operand (op0
, SImode
))
19466 action
= action_for_partial_flag_setting
;
19470 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19471 if (low_register_operand (op0
, SImode
))
19476 /* MOVS <Rd>,#<imm8> */
19477 if (CONST_INT_P (src
)
19478 && IN_RANGE (INTVAL (src
), 0, 255))
19479 action
= action_for_partial_flag_setting
;
19483 /* MOVS and MOV<c> with registers have different
19484 encodings, so are not relevant here. */
19492 if (action
!= SKIP
)
19494 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
19495 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
19498 if (action
== SWAP_CONV
)
19500 src
= copy_rtx (src
);
19501 XEXP (src
, 0) = op1
;
19502 XEXP (src
, 1) = op0
;
19503 pat
= gen_rtx_SET (dst
, src
);
19504 vec
= gen_rtvec (2, pat
, clobber
);
19506 else /* action == CONV */
19507 vec
= gen_rtvec (2, pat
, clobber
);
19509 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
19510 INSN_CODE (insn
) = -1;
19514 if (NONDEBUG_INSN_P (insn
))
19515 df_simulate_one_insn_backwards (bb
, insn
, &live
);
19519 CLEAR_REG_SET (&live
);
19522 /* Gcc puts the pool in the wrong place for ARM, since we can only
19523 load addresses a limited distance around the pc. We do some
19524 special munging to move the constant pool values to the correct
19525 point in the code. */
19530 HOST_WIDE_INT address
= 0;
19534 cmse_nonsecure_call_inline_register_clear ();
19536 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19537 if (cfun
->is_thunk
)
19539 else if (TARGET_THUMB1
)
19541 else if (TARGET_THUMB2
)
19544 /* Ensure all insns that must be split have been split at this point.
19545 Otherwise, the pool placement code below may compute incorrect
19546 insn lengths. Note that when optimizing, all insns have already
19547 been split at this point. */
19549 split_all_insns_noflow ();
19551 /* Make sure we do not attempt to create a literal pool even though it should
19552 no longer be necessary to create any. */
19553 if (arm_disable_literal_pool
)
19556 minipool_fix_head
= minipool_fix_tail
= NULL
;
19558 /* The first insn must always be a note, or the code below won't
19559 scan it properly. */
19560 insn
= get_insns ();
19561 gcc_assert (NOTE_P (insn
));
19564 /* Scan all the insns and record the operands that will need fixing. */
19565 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
19567 if (BARRIER_P (insn
))
19568 push_minipool_barrier (insn
, address
);
19569 else if (INSN_P (insn
))
19571 rtx_jump_table_data
*table
;
19573 note_invalid_constants (insn
, address
, true);
19574 address
+= get_attr_length (insn
);
19576 /* If the insn is a vector jump, add the size of the table
19577 and skip the table. */
19578 if (tablejump_p (insn
, NULL
, &table
))
19580 address
+= get_jump_table_size (table
);
19584 else if (LABEL_P (insn
))
19585 /* Add the worst-case padding due to alignment. We don't add
19586 the _current_ padding because the minipool insertions
19587 themselves might change it. */
19588 address
+= get_label_padding (insn
);
19591 fix
= minipool_fix_head
;
19593 /* Now scan the fixups and perform the required changes. */
19598 Mfix
* last_added_fix
;
19599 Mfix
* last_barrier
= NULL
;
19602 /* Skip any further barriers before the next fix. */
19603 while (fix
&& BARRIER_P (fix
->insn
))
19606 /* No more fixes. */
19610 last_added_fix
= NULL
;
19612 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
19614 if (BARRIER_P (ftmp
->insn
))
19616 if (ftmp
->address
>= minipool_vector_head
->max_address
)
19619 last_barrier
= ftmp
;
19621 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
19624 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
19627 /* If we found a barrier, drop back to that; any fixes that we
19628 could have reached but come after the barrier will now go in
19629 the next mini-pool. */
19630 if (last_barrier
!= NULL
)
19632 /* Reduce the refcount for those fixes that won't go into this
19634 for (fdel
= last_barrier
->next
;
19635 fdel
&& fdel
!= ftmp
;
19638 fdel
->minipool
->refcount
--;
19639 fdel
->minipool
= NULL
;
19642 ftmp
= last_barrier
;
19646 /* ftmp is first fix that we can't fit into this pool and
19647 there no natural barriers that we could use. Insert a
19648 new barrier in the code somewhere between the previous
19649 fix and this one, and arrange to jump around it. */
19650 HOST_WIDE_INT max_address
;
19652 /* The last item on the list of fixes must be a barrier, so
19653 we can never run off the end of the list of fixes without
19654 last_barrier being set. */
19657 max_address
= minipool_vector_head
->max_address
;
19658 /* Check that there isn't another fix that is in range that
19659 we couldn't fit into this pool because the pool was
19660 already too large: we need to put the pool before such an
19661 instruction. The pool itself may come just after the
19662 fix because create_fix_barrier also allows space for a
19663 jump instruction. */
19664 if (ftmp
->address
< max_address
)
19665 max_address
= ftmp
->address
+ 1;
19667 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
19670 assign_minipool_offsets (last_barrier
);
19674 if (!BARRIER_P (ftmp
->insn
)
19675 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
19682 /* Scan over the fixes we have identified for this pool, fixing them
19683 up and adding the constants to the pool itself. */
19684 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
19685 this_fix
= this_fix
->next
)
19686 if (!BARRIER_P (this_fix
->insn
))
19689 = plus_constant (Pmode
,
19690 gen_rtx_LABEL_REF (VOIDmode
,
19691 minipool_vector_label
),
19692 this_fix
->minipool
->offset
);
19693 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
19696 dump_minipool (last_barrier
->insn
);
19700 /* From now on we must synthesize any constants that we can't handle
19701 directly. This can happen if the RTL gets split during final
19702 instruction generation. */
19703 cfun
->machine
->after_arm_reorg
= 1;
19705 /* Free the minipool memory. */
19706 obstack_free (&minipool_obstack
, minipool_startobj
);
19709 /* Routines to output assembly language. */
19711 /* Return string representation of passed in real value. */
19712 static const char *
19713 fp_const_from_val (REAL_VALUE_TYPE
*r
)
19715 if (!fp_consts_inited
)
19718 gcc_assert (real_equal (r
, &value_fp0
));
19722 /* OPERANDS[0] is the entire list of insns that constitute pop,
19723 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19724 is in the list, UPDATE is true iff the list contains explicit
19725 update of base register. */
19727 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
19733 const char *conditional
;
19734 int num_saves
= XVECLEN (operands
[0], 0);
19735 unsigned int regno
;
19736 unsigned int regno_base
= REGNO (operands
[1]);
19737 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
19740 offset
+= update
? 1 : 0;
19741 offset
+= return_pc
? 1 : 0;
19743 /* Is the base register in the list? */
19744 for (i
= offset
; i
< num_saves
; i
++)
19746 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
19747 /* If SP is in the list, then the base register must be SP. */
19748 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
19749 /* If base register is in the list, there must be no explicit update. */
19750 if (regno
== regno_base
)
19751 gcc_assert (!update
);
19754 conditional
= reverse
? "%?%D0" : "%?%d0";
19755 /* Can't use POP if returning from an interrupt. */
19756 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
19757 sprintf (pattern
, "pop%s\t{", conditional
);
19760 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19761 It's just a convention, their semantics are identical. */
19762 if (regno_base
== SP_REGNUM
)
19763 sprintf (pattern
, "ldmfd%s\t", conditional
);
19765 sprintf (pattern
, "ldmia%s\t", conditional
);
19767 sprintf (pattern
, "ldm%s\t", conditional
);
19769 strcat (pattern
, reg_names
[regno_base
]);
19771 strcat (pattern
, "!, {");
19773 strcat (pattern
, ", {");
19776 /* Output the first destination register. */
19778 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
19780 /* Output the rest of the destination registers. */
19781 for (i
= offset
+ 1; i
< num_saves
; i
++)
19783 strcat (pattern
, ", ");
19785 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
19788 strcat (pattern
, "}");
19790 if (interrupt_p
&& return_pc
)
19791 strcat (pattern
, "^");
19793 output_asm_insn (pattern
, &cond
);
19797 /* Output the assembly for a store multiple. */
19800 vfp_output_vstmd (rtx
* operands
)
19806 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
19807 ? XEXP (operands
[0], 0)
19808 : XEXP (XEXP (operands
[0], 0), 0);
19809 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
19812 strcpy (pattern
, "vpush%?.64\t{%P1");
19814 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
19816 p
= strlen (pattern
);
19818 gcc_assert (REG_P (operands
[1]));
19820 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
19821 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
19823 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
19825 strcpy (&pattern
[p
], "}");
19827 output_asm_insn (pattern
, operands
);
19832 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19833 number of bytes pushed. */
19836 vfp_emit_fstmd (int base_reg
, int count
)
19843 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19844 register pairs are stored by a store multiple insn. We avoid this
19845 by pushing an extra pair. */
19846 if (count
== 2 && !arm_arch6
)
19848 if (base_reg
== LAST_VFP_REGNUM
- 3)
19853 /* FSTMD may not store more than 16 doubleword registers at once. Split
19854 larger stores into multiple parts (up to a maximum of two, in
19859 /* NOTE: base_reg is an internal register number, so each D register
19861 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
19862 saved
+= vfp_emit_fstmd (base_reg
, 16);
19866 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
19867 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
19869 reg
= gen_rtx_REG (DFmode
, base_reg
);
19872 XVECEXP (par
, 0, 0)
19873 = gen_rtx_SET (gen_frame_mem
19875 gen_rtx_PRE_MODIFY (Pmode
,
19878 (Pmode
, stack_pointer_rtx
,
19881 gen_rtx_UNSPEC (BLKmode
,
19882 gen_rtvec (1, reg
),
19883 UNSPEC_PUSH_MULT
));
19885 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19886 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
19887 RTX_FRAME_RELATED_P (tmp
) = 1;
19888 XVECEXP (dwarf
, 0, 0) = tmp
;
19890 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
19891 RTX_FRAME_RELATED_P (tmp
) = 1;
19892 XVECEXP (dwarf
, 0, 1) = tmp
;
19894 for (i
= 1; i
< count
; i
++)
19896 reg
= gen_rtx_REG (DFmode
, base_reg
);
19898 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
19900 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
19901 plus_constant (Pmode
,
19905 RTX_FRAME_RELATED_P (tmp
) = 1;
19906 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
19909 par
= emit_insn (par
);
19910 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
19911 RTX_FRAME_RELATED_P (par
) = 1;
19916 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19917 has the cmse_nonsecure_call attribute and returns false otherwise. */
19920 detect_cmse_nonsecure_call (tree addr
)
19925 tree fntype
= TREE_TYPE (addr
);
19926 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
19927 TYPE_ATTRIBUTES (fntype
)))
19933 /* Emit a call instruction with pattern PAT. ADDR is the address of
19934 the call target. */
19937 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
19941 insn
= emit_call_insn (pat
);
19943 /* The PIC register is live on entry to VxWorks PIC PLT entries.
19944 If the call might use such an entry, add a use of the PIC register
19945 to the instruction's CALL_INSN_FUNCTION_USAGE. */
19946 if (TARGET_VXWORKS_RTP
19949 && SYMBOL_REF_P (addr
)
19950 && (SYMBOL_REF_DECL (addr
)
19951 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
19952 : !SYMBOL_REF_LOCAL_P (addr
)))
19954 require_pic_register (NULL_RTX
, false /*compute_now*/);
19955 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
19960 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
19961 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), fdpic_reg
);
19964 if (TARGET_AAPCS_BASED
)
19966 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19967 linker. We need to add an IP clobber to allow setting
19968 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
19969 is not needed since it's a fixed register. */
19970 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
19971 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
19975 /* Output a 'call' insn. */
19977 output_call (rtx
*operands
)
19979 gcc_assert (!arm_arch5t
); /* Patterns should call blx <reg> directly. */
19981 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
19982 if (REGNO (operands
[0]) == LR_REGNUM
)
19984 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
19985 output_asm_insn ("mov%?\t%0, %|lr", operands
);
19988 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
19990 if (TARGET_INTERWORK
|| arm_arch4t
)
19991 output_asm_insn ("bx%?\t%0", operands
);
19993 output_asm_insn ("mov%?\t%|pc, %0", operands
);
19998 /* Output a move from arm registers to arm registers of a long double
19999 OPERANDS[0] is the destination.
20000 OPERANDS[1] is the source. */
20002 output_mov_long_double_arm_from_arm (rtx
*operands
)
20004 /* We have to be careful here because the two might overlap. */
20005 int dest_start
= REGNO (operands
[0]);
20006 int src_start
= REGNO (operands
[1]);
20010 if (dest_start
< src_start
)
20012 for (i
= 0; i
< 3; i
++)
20014 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
20015 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
20016 output_asm_insn ("mov%?\t%0, %1", ops
);
20021 for (i
= 2; i
>= 0; i
--)
20023 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
20024 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
20025 output_asm_insn ("mov%?\t%0, %1", ops
);
20033 arm_emit_movpair (rtx dest
, rtx src
)
20035 /* If the src is an immediate, simplify it. */
20036 if (CONST_INT_P (src
))
20038 HOST_WIDE_INT val
= INTVAL (src
);
20039 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
20040 if ((val
>> 16) & 0x0000ffff)
20042 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
20044 GEN_INT ((val
>> 16) & 0x0000ffff));
20045 rtx_insn
*insn
= get_last_insn ();
20046 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
20050 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
20051 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
20052 rtx_insn
*insn
= get_last_insn ();
20053 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
20056 /* Output a move between double words. It must be REG<-MEM
20059 output_move_double (rtx
*operands
, bool emit
, int *count
)
20061 enum rtx_code code0
= GET_CODE (operands
[0]);
20062 enum rtx_code code1
= GET_CODE (operands
[1]);
20067 /* The only case when this might happen is when
20068 you are looking at the length of a DImode instruction
20069 that has an invalid constant in it. */
20070 if (code0
== REG
&& code1
!= MEM
)
20072 gcc_assert (!emit
);
20079 unsigned int reg0
= REGNO (operands
[0]);
20080 const bool can_ldrd
= TARGET_LDRD
&& (TARGET_THUMB2
|| (reg0
% 2 == 0));
20082 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
20084 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
20086 switch (GET_CODE (XEXP (operands
[1], 0)))
20093 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
20094 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
20096 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
20101 gcc_assert (can_ldrd
);
20103 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
20110 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
20112 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
20120 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
20122 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
20127 gcc_assert (can_ldrd
);
20129 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
20134 /* Autoicrement addressing modes should never have overlapping
20135 base and destination registers, and overlapping index registers
20136 are already prohibited, so this doesn't need to worry about
20138 otherops
[0] = operands
[0];
20139 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
20140 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
20142 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
20144 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
20146 /* Registers overlap so split out the increment. */
20149 gcc_assert (can_ldrd
);
20150 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
20151 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
20158 /* Use a single insn if we can.
20159 FIXME: IWMMXT allows offsets larger than ldrd can
20160 handle, fix these up with a pair of ldr. */
20163 || !CONST_INT_P (otherops
[2])
20164 || (INTVAL (otherops
[2]) > -256
20165 && INTVAL (otherops
[2]) < 256)))
20168 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
20174 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
20175 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
20185 /* Use a single insn if we can.
20186 FIXME: IWMMXT allows offsets larger than ldrd can handle,
20187 fix these up with a pair of ldr. */
20190 || !CONST_INT_P (otherops
[2])
20191 || (INTVAL (otherops
[2]) > -256
20192 && INTVAL (otherops
[2]) < 256)))
20195 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
20201 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
20202 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
20212 /* We might be able to use ldrd %0, %1 here. However the range is
20213 different to ldr/adr, and it is broken on some ARMv7-M
20214 implementations. */
20215 /* Use the second register of the pair to avoid problematic
20217 otherops
[1] = operands
[1];
20219 output_asm_insn ("adr%?\t%0, %1", otherops
);
20220 operands
[1] = otherops
[0];
20224 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
20226 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
20233 /* ??? This needs checking for thumb2. */
20235 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
20236 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
20238 otherops
[0] = operands
[0];
20239 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
20240 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
20242 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
20244 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
20246 switch ((int) INTVAL (otherops
[2]))
20250 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
20256 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
20262 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
20266 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
20267 operands
[1] = otherops
[0];
20269 && (REG_P (otherops
[2])
20271 || (CONST_INT_P (otherops
[2])
20272 && INTVAL (otherops
[2]) > -256
20273 && INTVAL (otherops
[2]) < 256)))
20275 if (reg_overlap_mentioned_p (operands
[0],
20278 /* Swap base and index registers over to
20279 avoid a conflict. */
20280 std::swap (otherops
[1], otherops
[2]);
20282 /* If both registers conflict, it will usually
20283 have been fixed by a splitter. */
20284 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
20285 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
20289 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20290 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
20297 otherops
[0] = operands
[0];
20299 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
20304 if (CONST_INT_P (otherops
[2]))
20308 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
20309 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
20311 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20317 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20323 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
20330 return "ldrd%?\t%0, [%1]";
20332 return "ldmia%?\t%1, %M0";
20336 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
20337 /* Take care of overlapping base/data reg. */
20338 if (reg_mentioned_p (operands
[0], operands
[1]))
20342 output_asm_insn ("ldr%?\t%0, %1", otherops
);
20343 output_asm_insn ("ldr%?\t%0, %1", operands
);
20353 output_asm_insn ("ldr%?\t%0, %1", operands
);
20354 output_asm_insn ("ldr%?\t%0, %1", otherops
);
20364 /* Constraints should ensure this. */
20365 gcc_assert (code0
== MEM
&& code1
== REG
);
20366 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
20367 || (TARGET_ARM
&& TARGET_LDRD
));
20369 /* For TARGET_ARM the first source register of an STRD
20370 must be even. This is usually the case for double-word
20371 values but user assembly constraints can force an odd
20372 starting register. */
20373 bool allow_strd
= TARGET_LDRD
20374 && !(TARGET_ARM
&& (REGNO (operands
[1]) & 1) == 1);
20375 switch (GET_CODE (XEXP (operands
[0], 0)))
20381 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
20383 output_asm_insn ("stm%?\t%m0, %M1", operands
);
20388 gcc_assert (allow_strd
);
20390 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
20397 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
20399 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
20407 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
20409 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
20414 gcc_assert (allow_strd
);
20416 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
20421 otherops
[0] = operands
[1];
20422 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
20423 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
20425 /* IWMMXT allows offsets larger than strd can handle,
20426 fix these up with a pair of str. */
20428 && CONST_INT_P (otherops
[2])
20429 && (INTVAL(otherops
[2]) <= -256
20430 || INTVAL(otherops
[2]) >= 256))
20432 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
20436 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
20437 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
20446 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
20447 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
20453 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
20456 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
20461 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
20466 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
20467 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
20469 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
20473 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
20480 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
20487 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
20492 && (REG_P (otherops
[2])
20494 || (CONST_INT_P (otherops
[2])
20495 && INTVAL (otherops
[2]) > -256
20496 && INTVAL (otherops
[2]) < 256)))
20498 otherops
[0] = operands
[1];
20499 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
20501 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
20507 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
20508 otherops
[1] = operands
[1];
20511 output_asm_insn ("str%?\t%1, %0", operands
);
20512 output_asm_insn ("str%?\t%H1, %0", otherops
);
20522 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20523 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20526 output_move_quad (rtx
*operands
)
20528 if (REG_P (operands
[0]))
20530 /* Load, or reg->reg move. */
20532 if (MEM_P (operands
[1]))
20534 switch (GET_CODE (XEXP (operands
[1], 0)))
20537 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
20542 output_asm_insn ("adr%?\t%0, %1", operands
);
20543 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
20547 gcc_unreachable ();
20555 gcc_assert (REG_P (operands
[1]));
20557 dest
= REGNO (operands
[0]);
20558 src
= REGNO (operands
[1]);
20560 /* This seems pretty dumb, but hopefully GCC won't try to do it
20563 for (i
= 0; i
< 4; i
++)
20565 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
20566 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
20567 output_asm_insn ("mov%?\t%0, %1", ops
);
20570 for (i
= 3; i
>= 0; i
--)
20572 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
20573 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
20574 output_asm_insn ("mov%?\t%0, %1", ops
);
20580 gcc_assert (MEM_P (operands
[0]));
20581 gcc_assert (REG_P (operands
[1]));
20582 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
20584 switch (GET_CODE (XEXP (operands
[0], 0)))
20587 output_asm_insn ("stm%?\t%m0, %M1", operands
);
20591 gcc_unreachable ();
20598 /* Output a VFP load or store instruction. */
20601 output_move_vfp (rtx
*operands
)
20603 rtx reg
, mem
, addr
, ops
[2];
20604 int load
= REG_P (operands
[0]);
20605 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
20606 int sp
= (!TARGET_VFP_FP16INST
20607 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
20608 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
20613 reg
= operands
[!load
];
20614 mem
= operands
[load
];
20616 mode
= GET_MODE (reg
);
20618 gcc_assert (REG_P (reg
));
20619 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
20620 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
20626 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
20627 gcc_assert (MEM_P (mem
));
20629 addr
= XEXP (mem
, 0);
20631 switch (GET_CODE (addr
))
20634 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20635 ops
[0] = XEXP (addr
, 0);
20640 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20641 ops
[0] = XEXP (addr
, 0);
20646 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
20652 sprintf (buff
, templ
,
20653 load
? "ld" : "st",
20654 dp
? "64" : sp
? "32" : "16",
20656 integer_p
? "\t%@ int" : "");
20657 output_asm_insn (buff
, ops
);
20662 /* Output a Neon double-word or quad-word load or store, or a load
20663 or store for larger structure modes.
20665 WARNING: The ordering of elements is weird in big-endian mode,
20666 because the EABI requires that vectors stored in memory appear
20667 as though they were stored by a VSTM, as required by the EABI.
20668 GCC RTL defines element ordering based on in-memory order.
20669 This can be different from the architectural ordering of elements
20670 within a NEON register. The intrinsics defined in arm_neon.h use the
20671 NEON register element ordering, not the GCC RTL element ordering.
20673 For example, the in-memory ordering of a big-endian a quadword
20674 vector with 16-bit elements when stored from register pair {d0,d1}
20675 will be (lowest address first, d0[N] is NEON register element N):
20677 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20679 When necessary, quadword registers (dN, dN+1) are moved to ARM
20680 registers from rN in the order:
20682 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20684 So that STM/LDM can be used on vectors in ARM registers, and the
20685 same memory layout will result as if VSTM/VLDM were used.
20687 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20688 possible, which allows use of appropriate alignment tags.
20689 Note that the choice of "64" is independent of the actual vector
20690 element size; this size simply ensures that the behavior is
20691 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20693 Due to limitations of those instructions, use of VST1.64/VLD1.64
20694 is not possible if:
20695 - the address contains PRE_DEC, or
20696 - the mode refers to more than 4 double-word registers
20698 In those cases, it would be possible to replace VSTM/VLDM by a
20699 sequence of instructions; this is not currently implemented since
20700 this is not certain to actually improve performance. */
20703 output_move_neon (rtx
*operands
)
20705 rtx reg
, mem
, addr
, ops
[2];
20706 int regno
, nregs
, load
= REG_P (operands
[0]);
20711 reg
= operands
[!load
];
20712 mem
= operands
[load
];
20714 mode
= GET_MODE (reg
);
20716 gcc_assert (REG_P (reg
));
20717 regno
= REGNO (reg
);
20718 nregs
= REG_NREGS (reg
) / 2;
20719 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
20720 || NEON_REGNO_OK_FOR_QUAD (regno
));
20721 gcc_assert (VALID_NEON_DREG_MODE (mode
)
20722 || VALID_NEON_QREG_MODE (mode
)
20723 || VALID_NEON_STRUCT_MODE (mode
));
20724 gcc_assert (MEM_P (mem
));
20726 addr
= XEXP (mem
, 0);
20728 /* Strip off const from addresses like (const (plus (...))). */
20729 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
20730 addr
= XEXP (addr
, 0);
20732 switch (GET_CODE (addr
))
20735 /* We have to use vldm / vstm for too-large modes. */
20736 if (nregs
> 4 || (TARGET_HAVE_MVE
&& nregs
>= 2))
20738 templ
= "v%smia%%?\t%%0!, %%h1";
20739 ops
[0] = XEXP (addr
, 0);
20743 templ
= "v%s1.64\t%%h1, %%A0";
20750 /* We have to use vldm / vstm in this case, since there is no
20751 pre-decrement form of the vld1 / vst1 instructions. */
20752 templ
= "v%smdb%%?\t%%0!, %%h1";
20753 ops
[0] = XEXP (addr
, 0);
20758 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20759 gcc_unreachable ();
20762 /* We have to use vldm / vstm for too-large modes. */
20765 if (nregs
> 4 || (TARGET_HAVE_MVE
&& nregs
>= 2))
20766 templ
= "v%smia%%?\t%%m0, %%h1";
20768 templ
= "v%s1.64\t%%h1, %%A0";
20774 /* Fall through. */
20776 if (GET_CODE (addr
) == PLUS
)
20777 addr
= XEXP (addr
, 0);
20778 /* Fall through. */
20783 for (i
= 0; i
< nregs
; i
++)
20785 /* We're only using DImode here because it's a convenient
20787 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
20788 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
20789 if (reg_overlap_mentioned_p (ops
[0], mem
))
20791 gcc_assert (overlap
== -1);
20796 if (TARGET_HAVE_MVE
&& LABEL_REF_P (addr
))
20797 sprintf (buff
, "v%sr.64\t%%P0, %%1", load
? "ld" : "st");
20799 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
20800 output_asm_insn (buff
, ops
);
20805 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
20806 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
20807 if (TARGET_HAVE_MVE
&& LABEL_REF_P (addr
))
20808 sprintf (buff
, "v%sr.32\t%%P0, %%1", load
? "ld" : "st");
20810 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
20811 output_asm_insn (buff
, ops
);
20818 gcc_unreachable ();
20821 sprintf (buff
, templ
, load
? "ld" : "st");
20822 output_asm_insn (buff
, ops
);
20827 /* Compute and return the length of neon_mov<mode>, where <mode> is
20828 one of VSTRUCT modes: EI, OI, CI or XI. */
20830 arm_attr_length_move_neon (rtx_insn
*insn
)
20832 rtx reg
, mem
, addr
;
20836 extract_insn_cached (insn
);
20838 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
20840 mode
= GET_MODE (recog_data
.operand
[0]);
20851 gcc_unreachable ();
20855 load
= REG_P (recog_data
.operand
[0]);
20856 reg
= recog_data
.operand
[!load
];
20857 mem
= recog_data
.operand
[load
];
20859 gcc_assert (MEM_P (mem
));
20861 addr
= XEXP (mem
, 0);
20863 /* Strip off const from addresses like (const (plus (...))). */
20864 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
20865 addr
= XEXP (addr
, 0);
20867 if (LABEL_REF_P (addr
) || GET_CODE (addr
) == PLUS
)
20869 int insns
= REG_NREGS (reg
) / 2;
20876 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20880 arm_address_offset_is_imm (rtx_insn
*insn
)
20884 extract_insn_cached (insn
);
20886 if (REG_P (recog_data
.operand
[0]))
20889 mem
= recog_data
.operand
[0];
20891 gcc_assert (MEM_P (mem
));
20893 addr
= XEXP (mem
, 0);
20896 || (GET_CODE (addr
) == PLUS
20897 && REG_P (XEXP (addr
, 0))
20898 && CONST_INT_P (XEXP (addr
, 1))))
20904 /* Output an ADD r, s, #n where n may be too big for one instruction.
20905 If adding zero to one register, output nothing. */
20907 output_add_immediate (rtx
*operands
)
20909 HOST_WIDE_INT n
= INTVAL (operands
[2]);
20911 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
20914 output_multi_immediate (operands
,
20915 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20918 output_multi_immediate (operands
,
20919 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20926 /* Output a multiple immediate operation.
20927 OPERANDS is the vector of operands referred to in the output patterns.
20928 INSTR1 is the output pattern to use for the first constant.
20929 INSTR2 is the output pattern to use for subsequent constants.
20930 IMMED_OP is the index of the constant slot in OPERANDS.
20931 N is the constant value. */
20932 static const char *
20933 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
20934 int immed_op
, HOST_WIDE_INT n
)
20936 #if HOST_BITS_PER_WIDE_INT > 32
20942 /* Quick and easy output. */
20943 operands
[immed_op
] = const0_rtx
;
20944 output_asm_insn (instr1
, operands
);
20949 const char * instr
= instr1
;
20951 /* Note that n is never zero here (which would give no output). */
20952 for (i
= 0; i
< 32; i
+= 2)
20956 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
20957 output_asm_insn (instr
, operands
);
20967 /* Return the name of a shifter operation. */
20968 static const char *
20969 arm_shift_nmem(enum rtx_code code
)
20974 return ARM_LSL_NAME
;
20990 /* Return the appropriate ARM instruction for the operation code.
20991 The returned result should not be overwritten. OP is the rtx of the
20992 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20995 arithmetic_instr (rtx op
, int shift_first_arg
)
20997 switch (GET_CODE (op
))
21003 return shift_first_arg
? "rsb" : "sub";
21018 return arm_shift_nmem(GET_CODE(op
));
21021 gcc_unreachable ();
21025 /* Ensure valid constant shifts and return the appropriate shift mnemonic
21026 for the operation code. The returned result should not be overwritten.
21027 OP is the rtx code of the shift.
21028 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
21030 static const char *
21031 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
21034 enum rtx_code code
= GET_CODE (op
);
21039 if (!CONST_INT_P (XEXP (op
, 1)))
21041 output_operand_lossage ("invalid shift operand");
21046 *amountp
= 32 - INTVAL (XEXP (op
, 1));
21054 mnem
= arm_shift_nmem(code
);
21055 if (CONST_INT_P (XEXP (op
, 1)))
21057 *amountp
= INTVAL (XEXP (op
, 1));
21059 else if (REG_P (XEXP (op
, 1)))
21066 output_operand_lossage ("invalid shift operand");
21072 /* We never have to worry about the amount being other than a
21073 power of 2, since this case can never be reloaded from a reg. */
21074 if (!CONST_INT_P (XEXP (op
, 1)))
21076 output_operand_lossage ("invalid shift operand");
21080 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
21082 /* Amount must be a power of two. */
21083 if (*amountp
& (*amountp
- 1))
21085 output_operand_lossage ("invalid shift operand");
21089 *amountp
= exact_log2 (*amountp
);
21090 gcc_assert (IN_RANGE (*amountp
, 0, 31));
21091 return ARM_LSL_NAME
;
21094 output_operand_lossage ("invalid shift operand");
21098 /* This is not 100% correct, but follows from the desire to merge
21099 multiplication by a power of 2 with the recognizer for a
21100 shift. >=32 is not a valid shift for "lsl", so we must try and
21101 output a shift that produces the correct arithmetical result.
21102 Using lsr #32 is identical except for the fact that the carry bit
21103 is not set correctly if we set the flags; but we never use the
21104 carry bit from such an operation, so we can ignore that. */
21105 if (code
== ROTATERT
)
21106 /* Rotate is just modulo 32. */
21108 else if (*amountp
!= (*amountp
& 31))
21110 if (code
== ASHIFT
)
21115 /* Shifts of 0 are no-ops. */
21122 /* Output a .ascii pseudo-op, keeping track of lengths. This is
21123 because /bin/as is horribly restrictive. The judgement about
21124 whether or not each character is 'printable' (and can be output as
21125 is) or not (and must be printed with an octal escape) must be made
21126 with reference to the *host* character set -- the situation is
21127 similar to that discussed in the comments above pp_c_char in
21128 c-pretty-print.cc. */
21130 #define MAX_ASCII_LEN 51
21133 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
21136 int len_so_far
= 0;
21138 fputs ("\t.ascii\t\"", stream
);
21140 for (i
= 0; i
< len
; i
++)
21144 if (len_so_far
>= MAX_ASCII_LEN
)
21146 fputs ("\"\n\t.ascii\t\"", stream
);
21152 if (c
== '\\' || c
== '\"')
21154 putc ('\\', stream
);
21162 fprintf (stream
, "\\%03o", c
);
21167 fputs ("\"\n", stream
);
21171 /* Compute the register save mask for registers 0 through 12
21172 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
21174 static unsigned long
21175 arm_compute_save_reg0_reg12_mask (void)
21177 unsigned long func_type
= arm_current_func_type ();
21178 unsigned long save_reg_mask
= 0;
21181 if (IS_INTERRUPT (func_type
))
21183 unsigned int max_reg
;
21184 /* Interrupt functions must not corrupt any registers,
21185 even call clobbered ones. If this is a leaf function
21186 we can just examine the registers used by the RTL, but
21187 otherwise we have to assume that whatever function is
21188 called might clobber anything, and so we have to save
21189 all the call-clobbered registers as well. */
21190 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
21191 /* FIQ handlers have registers r8 - r12 banked, so
21192 we only need to check r0 - r7, Normal ISRs only
21193 bank r14 and r15, so we must check up to r12.
21194 r13 is the stack pointer which is always preserved,
21195 so we do not need to consider it here. */
21200 for (reg
= 0; reg
<= max_reg
; reg
++)
21201 if (reg_needs_saving_p (reg
))
21202 save_reg_mask
|= (1 << reg
);
21204 /* Also save the pic base register if necessary. */
21205 if (PIC_REGISTER_MAY_NEED_SAVING
21206 && crtl
->uses_pic_offset_table
)
21207 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21209 else if (IS_VOLATILE(func_type
))
21211 /* For noreturn functions we historically omitted register saves
21212 altogether. However this really messes up debugging. As a
21213 compromise save just the frame pointers. Combined with the link
21214 register saved elsewhere this should be sufficient to get
21216 if (frame_pointer_needed
)
21217 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21218 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
21219 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
21220 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
21221 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
21225 /* In the normal case we only need to save those registers
21226 which are call saved and which are used by this function. */
21227 for (reg
= 0; reg
<= 11; reg
++)
21228 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
21229 save_reg_mask
|= (1 << reg
);
21231 /* Handle the frame pointer as a special case. */
21232 if (frame_pointer_needed
)
21233 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21235 /* If we aren't loading the PIC register,
21236 don't stack it even though it may be live. */
21237 if (PIC_REGISTER_MAY_NEED_SAVING
21238 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
21239 || crtl
->uses_pic_offset_table
))
21240 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21242 /* The prologue will copy SP into R0, so save it. */
21243 if (IS_STACKALIGN (func_type
))
21244 save_reg_mask
|= 1;
21247 /* Save registers so the exception handler can modify them. */
21248 if (crtl
->calls_eh_return
)
21254 reg
= EH_RETURN_DATA_REGNO (i
);
21255 if (reg
== INVALID_REGNUM
)
21257 save_reg_mask
|= 1 << reg
;
21261 return save_reg_mask
;
21264 /* Return true if r3 is live at the start of the function. */
21267 arm_r3_live_at_start_p (void)
21269 /* Just look at cfg info, which is still close enough to correct at this
21270 point. This gives false positives for broken functions that might use
21271 uninitialized data that happens to be allocated in r3, but who cares? */
21272 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
21275 /* Compute the number of bytes used to store the static chain register on the
21276 stack, above the stack frame. We need to know this accurately to get the
21277 alignment of the rest of the stack frame correct. */
21280 arm_compute_static_chain_stack_bytes (void)
21282 /* Once the value is updated from the init value of -1, do not
21284 if (cfun
->machine
->static_chain_stack_bytes
!= -1)
21285 return cfun
->machine
->static_chain_stack_bytes
;
21287 /* See the defining assertion in arm_expand_prologue. */
21288 if (IS_NESTED (arm_current_func_type ())
21289 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21290 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21291 || flag_stack_clash_protection
)
21292 && !df_regs_ever_live_p (LR_REGNUM
)))
21293 && arm_r3_live_at_start_p ()
21294 && crtl
->args
.pretend_args_size
== 0)
21300 /* Compute a bit mask of which core registers need to be
21301 saved on the stack for the current function.
21302 This is used by arm_compute_frame_layout, which may add extra registers. */
21304 static unsigned long
21305 arm_compute_save_core_reg_mask (void)
21307 unsigned int save_reg_mask
= 0;
21308 unsigned long func_type
= arm_current_func_type ();
21311 if (IS_NAKED (func_type
))
21312 /* This should never really happen. */
21315 /* If we are creating a stack frame, then we must save the frame pointer,
21316 IP (which will hold the old stack pointer), LR and the PC. */
21317 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21319 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
21322 | (1 << PC_REGNUM
);
21324 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
21326 if (arm_current_function_pac_enabled_p ())
21327 save_reg_mask
|= 1 << IP_REGNUM
;
21329 /* Decide if we need to save the link register.
21330 Interrupt routines have their own banked link register,
21331 so they never need to save it.
21332 Otherwise if we do not use the link register we do not need to save
21333 it. If we are pushing other registers onto the stack however, we
21334 can save an instruction in the epilogue by pushing the link register
21335 now and then popping it back into the PC. This incurs extra memory
21336 accesses though, so we only do it when optimizing for size, and only
21337 if we know that we will not need a fancy return sequence. */
21338 if (df_regs_ever_live_p (LR_REGNUM
)
21341 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
21342 && !crtl
->tail_call_emit
21343 && !crtl
->calls_eh_return
))
21344 save_reg_mask
|= 1 << LR_REGNUM
;
21346 if (cfun
->machine
->lr_save_eliminated
)
21347 save_reg_mask
&= ~ (1 << LR_REGNUM
);
21349 if (TARGET_REALLY_IWMMXT
21350 && ((bit_count (save_reg_mask
)
21351 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
21352 arm_compute_static_chain_stack_bytes())
21355 /* The total number of registers that are going to be pushed
21356 onto the stack is odd. We need to ensure that the stack
21357 is 64-bit aligned before we start to save iWMMXt registers,
21358 and also before we start to create locals. (A local variable
21359 might be a double or long long which we will load/store using
21360 an iWMMXt instruction). Therefore we need to push another
21361 ARM register, so that the stack will be 64-bit aligned. We
21362 try to avoid using the arg registers (r0 -r3) as they might be
21363 used to pass values in a tail call. */
21364 for (reg
= 4; reg
<= 12; reg
++)
21365 if ((save_reg_mask
& (1 << reg
)) == 0)
21369 save_reg_mask
|= (1 << reg
);
21372 cfun
->machine
->sibcall_blocked
= 1;
21373 save_reg_mask
|= (1 << 3);
21377 /* We may need to push an additional register for use initializing the
21378 PIC base register. */
21379 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
21380 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
21382 reg
= thumb_find_work_register (1 << 4);
21383 if (!call_used_or_fixed_reg_p (reg
))
21384 save_reg_mask
|= (1 << reg
);
21387 return save_reg_mask
;
21390 /* Compute a bit mask of which core registers need to be
21391 saved on the stack for the current function. */
21392 static unsigned long
21393 thumb1_compute_save_core_reg_mask (void)
21395 unsigned long mask
;
21399 for (reg
= 0; reg
< 12; reg
++)
21400 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
21403 /* Handle the frame pointer as a special case. */
21404 if (frame_pointer_needed
)
21405 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21408 && !TARGET_SINGLE_PIC_BASE
21409 && arm_pic_register
!= INVALID_REGNUM
21410 && crtl
->uses_pic_offset_table
)
21411 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21413 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21414 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
21415 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
21417 /* LR will also be pushed if any lo regs are pushed. */
21418 if (mask
& 0xff || thumb_force_lr_save ())
21419 mask
|= (1 << LR_REGNUM
);
21421 bool call_clobbered_scratch
21422 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21423 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21425 /* Make sure we have a low work register if we need one. We will
21426 need one if we are going to push a high register, but we are not
21427 currently intending to push a low register. However if both the
21428 prologue and epilogue have a spare call-clobbered low register,
21429 then we won't need to find an additional work register. It does
21430 not need to be the same register in the prologue and
21432 if ((mask
& 0xff) == 0
21433 && !call_clobbered_scratch
21434 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
21436 /* Use thumb_find_work_register to choose which register
21437 we will use. If the register is live then we will
21438 have to push it. Use LAST_LO_REGNUM as our fallback
21439 choice for the register to select. */
21440 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
21441 /* Make sure the register returned by thumb_find_work_register is
21442 not part of the return value. */
21443 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
21444 reg
= LAST_LO_REGNUM
;
21446 if (callee_saved_reg_p (reg
))
21450 /* The 504 below is 8 bytes less than 512 because there are two possible
21451 alignment words. We can't tell here if they will be present or not so we
21452 have to play it safe and assume that they are. */
21453 if ((CALLER_INTERWORKING_SLOT_SIZE
+
21454 ROUND_UP_WORD (get_frame_size ()) +
21455 crtl
->outgoing_args_size
) >= 504)
21457 /* This is the same as the code in thumb1_expand_prologue() which
21458 determines which register to use for stack decrement. */
21459 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
21460 if (mask
& (1 << reg
))
21463 if (reg
> LAST_LO_REGNUM
)
21465 /* Make sure we have a register available for stack decrement. */
21466 mask
|= 1 << LAST_LO_REGNUM
;
21473 /* Return the number of bytes required to save VFP registers. */
21475 arm_get_vfp_saved_size (void)
21477 unsigned int regno
;
21482 /* Space for saved VFP registers. */
21483 if (TARGET_VFP_BASE
)
21486 for (regno
= FIRST_VFP_REGNUM
;
21487 regno
< LAST_VFP_REGNUM
;
21490 if (!reg_needs_saving_p (regno
) && !reg_needs_saving_p (regno
+ 1))
21494 /* Workaround ARM10 VFPr1 bug. */
21495 if (count
== 2 && !arm_arch6
)
21497 saved
+= count
* 8;
21506 if (count
== 2 && !arm_arch6
)
21508 saved
+= count
* 8;
21515 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21516 everything bar the final return instruction. If simple_return is true,
21517 then do not output epilogue, because it has already been emitted in RTL.
21519 Note: do not forget to update length attribute of corresponding insn pattern
21520 when changing assembly output (eg. length attribute of
21521 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21522 register clearing sequences). */
21524 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
21525 bool simple_return
)
21527 char conditional
[10];
21530 unsigned long live_regs_mask
;
21531 unsigned long func_type
;
21532 arm_stack_offsets
*offsets
;
21534 func_type
= arm_current_func_type ();
21536 if (IS_NAKED (func_type
))
21539 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
21541 /* If this function was declared non-returning, and we have
21542 found a tail call, then we have to trust that the called
21543 function won't return. */
21548 /* Otherwise, trap an attempted return by aborting. */
21550 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
21552 assemble_external_libcall (ops
[1]);
21553 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
21559 gcc_assert (!cfun
->calls_alloca
|| really_return
);
21561 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
21563 cfun
->machine
->return_used_this_function
= 1;
21565 offsets
= arm_get_frame_offsets ();
21566 live_regs_mask
= offsets
->saved_regs_mask
;
21568 if (!simple_return
&& live_regs_mask
)
21570 const char * return_reg
;
21572 /* If we do not have any special requirements for function exit
21573 (e.g. interworking) then we can load the return address
21574 directly into the PC. Otherwise we must load it into LR. */
21576 && !IS_CMSE_ENTRY (func_type
)
21577 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
21578 return_reg
= reg_names
[PC_REGNUM
];
21580 return_reg
= reg_names
[LR_REGNUM
];
21582 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
21584 /* There are three possible reasons for the IP register
21585 being saved. 1) a stack frame was created, in which case
21586 IP contains the old stack pointer, or 2) an ISR routine
21587 corrupted it, or 3) it was saved to align the stack on
21588 iWMMXt. In case 1, restore IP into SP, otherwise just
21590 if (frame_pointer_needed
)
21592 live_regs_mask
&= ~ (1 << IP_REGNUM
);
21593 live_regs_mask
|= (1 << SP_REGNUM
);
21596 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
21599 /* On some ARM architectures it is faster to use LDR rather than
21600 LDM to load a single register. On other architectures, the
21601 cost is the same. In 26 bit mode, or for exception handlers,
21602 we have to use LDM to load the PC so that the CPSR is also
21604 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
21605 if (live_regs_mask
== (1U << reg
))
21608 if (reg
<= LAST_ARM_REGNUM
21609 && (reg
!= LR_REGNUM
21611 || ! IS_INTERRUPT (func_type
)))
21613 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
21614 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
21621 /* Generate the load multiple instruction to restore the
21622 registers. Note we can get here, even if
21623 frame_pointer_needed is true, but only if sp already
21624 points to the base of the saved core registers. */
21625 if (live_regs_mask
& (1 << SP_REGNUM
))
21627 unsigned HOST_WIDE_INT stack_adjust
;
21629 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
21630 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
21632 if (stack_adjust
&& arm_arch5t
&& TARGET_ARM
)
21633 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
21636 /* If we can't use ldmib (SA110 bug),
21637 then try to pop r3 instead. */
21639 live_regs_mask
|= 1 << 3;
21641 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
21644 /* For interrupt returns we have to use an LDM rather than
21645 a POP so that we can use the exception return variant. */
21646 else if (IS_INTERRUPT (func_type
))
21647 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
21649 sprintf (instr
, "pop%s\t{", conditional
);
21651 p
= instr
+ strlen (instr
);
21653 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
21654 if (live_regs_mask
& (1 << reg
))
21656 int l
= strlen (reg_names
[reg
]);
21662 memcpy (p
, ", ", 2);
21666 memcpy (p
, "%|", 2);
21667 memcpy (p
+ 2, reg_names
[reg
], l
);
21671 if (live_regs_mask
& (1 << LR_REGNUM
))
21673 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
21674 /* If returning from an interrupt, restore the CPSR. */
21675 if (IS_INTERRUPT (func_type
))
21682 output_asm_insn (instr
, & operand
);
21684 /* See if we need to generate an extra instruction to
21685 perform the actual function return. */
21687 && func_type
!= ARM_FT_INTERWORKED
21688 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
21690 /* The return has already been handled
21691 by loading the LR into the PC. */
21698 switch ((int) ARM_FUNC_TYPE (func_type
))
21702 /* ??? This is wrong for unified assembly syntax. */
21703 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
21706 case ARM_FT_INTERWORKED
:
21707 gcc_assert (arm_arch5t
|| arm_arch4t
);
21708 sprintf (instr
, "bx%s\t%%|lr", conditional
);
21711 case ARM_FT_EXCEPTION
:
21712 /* ??? This is wrong for unified assembly syntax. */
21713 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
21717 if (IS_CMSE_ENTRY (func_type
))
21719 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21720 emitted by cmse_nonsecure_entry_clear_before_return () and the
21721 VSTR/VLDR instructions in the prologue and epilogue. */
21722 if (!TARGET_HAVE_FPCXT_CMSE
)
21724 /* Check if we have to clear the 'GE bits' which is only used if
21725 parallel add and subtraction instructions are available. */
21726 if (TARGET_INT_SIMD
)
21727 snprintf (instr
, sizeof (instr
),
21728 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
21730 snprintf (instr
, sizeof (instr
),
21731 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
21733 output_asm_insn (instr
, & operand
);
21734 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21736 if (TARGET_HARD_FLOAT
)
21738 /* Clear the cumulative exception-status bits (0-4,7) and
21739 the condition code bits (28-31) of the FPSCR. We need
21740 to remember to clear the first scratch register used
21741 (IP) and save and restore the second (r4).
21743 Important note: the length of the
21744 thumb2_cmse_entry_return insn pattern must account for
21745 the size of the below instructions. */
21746 output_asm_insn ("push\t{%|r4}", & operand
);
21747 output_asm_insn ("vmrs\t%|ip, fpscr", & operand
);
21748 output_asm_insn ("movw\t%|r4, #65376", & operand
);
21749 output_asm_insn ("movt\t%|r4, #4095", & operand
);
21750 output_asm_insn ("and\t%|ip, %|r4", & operand
);
21751 output_asm_insn ("vmsr\tfpscr, %|ip", & operand
);
21752 output_asm_insn ("pop\t{%|r4}", & operand
);
21753 output_asm_insn ("mov\t%|ip, %|lr", & operand
);
21756 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
21758 /* Use bx if it's available. */
21759 else if (arm_arch5t
|| arm_arch4t
)
21760 sprintf (instr
, "bx%s\t%%|lr", conditional
);
21762 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
21766 output_asm_insn (instr
, & operand
);
21772 /* Output in FILE asm statements needed to declare the NAME of the function
21773 defined by its DECL node. */
21776 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
21778 size_t cmse_name_len
;
21779 char *cmse_name
= 0;
21780 char cmse_prefix
[] = "__acle_se_";
21782 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21783 extra function label for each function with the 'cmse_nonsecure_entry'
21784 attribute. This extra function label should be prepended with
21785 '__acle_se_', telling the linker that it needs to create secure gateway
21786 veneers for this function. */
21787 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
21788 DECL_ATTRIBUTES (decl
)))
21790 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
21791 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
21792 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
21793 targetm
.asm_out
.globalize_label (file
, cmse_name
);
21795 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
21796 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
21799 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
21800 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21801 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21802 ASM_OUTPUT_LABEL (file
, name
);
21805 ASM_OUTPUT_LABEL (file
, cmse_name
);
21807 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
21810 /* Write the function name into the code section, directly preceding
21811 the function prologue.
21813 Code will be output similar to this:
21815 .ascii "arm_poke_function_name", 0
21818 .word 0xff000000 + (t1 - t0)
21819 arm_poke_function_name
21821 stmfd sp!, {fp, ip, lr, pc}
21824 When performing a stack backtrace, code can inspect the value
21825 of 'pc' stored at 'fp' + 0. If the trace function then looks
21826 at location pc - 12 and the top 8 bits are set, then we know
21827 that there is a function name embedded immediately preceding this
21828 location and has length ((pc[-3]) & 0xff000000).
21830 We assume that pc is declared as a pointer to an unsigned long.
21832 It is of no benefit to output the function name if we are assembling
21833 a leaf function. These function types will not contain a stack
21834 backtrace structure, therefore it is not possible to determine the
21837 arm_poke_function_name (FILE *stream
, const char *name
)
21839 unsigned long alignlength
;
21840 unsigned long length
;
21843 length
= strlen (name
) + 1;
21844 alignlength
= ROUND_UP_WORD (length
);
21846 ASM_OUTPUT_ASCII (stream
, name
, length
);
21847 ASM_OUTPUT_ALIGN (stream
, 2);
21848 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
21849 assemble_aligned_integer (UNITS_PER_WORD
, x
);
21852 /* Place some comments into the assembler stream
21853 describing the current function. */
21855 arm_output_function_prologue (FILE *f
)
21857 unsigned long func_type
;
21859 /* Sanity check. */
21860 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
21862 func_type
= arm_current_func_type ();
21864 switch ((int) ARM_FUNC_TYPE (func_type
))
21867 case ARM_FT_NORMAL
:
21869 case ARM_FT_INTERWORKED
:
21870 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
21873 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
21876 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
21878 case ARM_FT_EXCEPTION
:
21879 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
21883 if (IS_NAKED (func_type
))
21884 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21886 if (IS_VOLATILE (func_type
))
21887 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
21889 if (IS_NESTED (func_type
))
21890 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
21891 if (IS_STACKALIGN (func_type
))
21892 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21893 if (IS_CMSE_ENTRY (func_type
))
21894 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
21896 asm_fprintf (f
, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21897 (HOST_WIDE_INT
) crtl
->args
.size
,
21898 crtl
->args
.pretend_args_size
,
21899 (HOST_WIDE_INT
) get_frame_size ());
21901 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21902 frame_pointer_needed
,
21903 cfun
->machine
->uses_anonymous_args
);
21905 if (cfun
->machine
->lr_save_eliminated
)
21906 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
21908 if (crtl
->calls_eh_return
)
21909 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
21914 arm_output_function_epilogue (FILE *)
21916 arm_stack_offsets
*offsets
;
21922 /* Emit any call-via-reg trampolines that are needed for v4t support
21923 of call_reg and call_value_reg type insns. */
21924 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
21926 rtx label
= cfun
->machine
->call_via
[regno
];
21930 switch_to_section (function_section (current_function_decl
));
21931 targetm
.asm_out
.internal_label (asm_out_file
, "L",
21932 CODE_LABEL_NUMBER (label
));
21933 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
21937 /* ??? Probably not safe to set this here, since it assumes that a
21938 function will be emitted as assembly immediately after we generate
21939 RTL for it. This does not happen for inline functions. */
21940 cfun
->machine
->return_used_this_function
= 0;
21942 else /* TARGET_32BIT */
21944 /* We need to take into account any stack-frame rounding. */
21945 offsets
= arm_get_frame_offsets ();
21947 gcc_assert (!use_return_insn (FALSE
, NULL
)
21948 || (cfun
->machine
->return_used_this_function
!= 0)
21949 || offsets
->saved_regs
== offsets
->outgoing_args
21950 || frame_pointer_needed
);
21954 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21955 STR and STRD. If an even number of registers are being pushed, one
21956 or more STRD patterns are created for each register pair. If an
21957 odd number of registers are pushed, emit an initial STR followed by
21958 as many STRD instructions as are needed. This works best when the
21959 stack is initially 64-bit aligned (the normal case), since it
21960 ensures that each STRD is also 64-bit aligned. */
21962 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
21967 rtx par
= NULL_RTX
;
21968 rtx dwarf
= NULL_RTX
;
21972 num_regs
= bit_count (saved_regs_mask
);
21974 /* Must be at least one register to save, and can't save SP or PC. */
21975 gcc_assert (num_regs
> 0 && num_regs
<= 14);
21976 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
21977 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
21979 /* Create sequence for DWARF info. All the frame-related data for
21980 debugging is held in this wrapper. */
21981 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
21983 /* Describe the stack adjustment. */
21984 tmp
= gen_rtx_SET (stack_pointer_rtx
,
21985 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
21986 RTX_FRAME_RELATED_P (tmp
) = 1;
21987 XVECEXP (dwarf
, 0, 0) = tmp
;
21989 /* Find the first register. */
21990 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
21995 /* If there's an odd number of registers to push. Start off by
21996 pushing a single register. This ensures that subsequent strd
21997 operations are dword aligned (assuming that SP was originally
21998 64-bit aligned). */
21999 if ((num_regs
& 1) != 0)
22001 rtx reg
, mem
, insn
;
22003 reg
= gen_rtx_REG (SImode
, regno
);
22005 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
22006 stack_pointer_rtx
));
22008 mem
= gen_frame_mem (Pmode
,
22010 (Pmode
, stack_pointer_rtx
,
22011 plus_constant (Pmode
, stack_pointer_rtx
,
22014 tmp
= gen_rtx_SET (mem
, reg
);
22015 RTX_FRAME_RELATED_P (tmp
) = 1;
22016 insn
= emit_insn (tmp
);
22017 RTX_FRAME_RELATED_P (insn
) = 1;
22018 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22019 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
22020 RTX_FRAME_RELATED_P (tmp
) = 1;
22023 XVECEXP (dwarf
, 0, i
) = tmp
;
22027 while (i
< num_regs
)
22028 if (saved_regs_mask
& (1 << regno
))
22030 rtx reg1
, reg2
, mem1
, mem2
;
22031 rtx tmp0
, tmp1
, tmp2
;
22034 /* Find the register to pair with this one. */
22035 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
22039 reg1
= gen_rtx_REG (SImode
, regno
);
22040 reg2
= gen_rtx_REG (SImode
, regno2
);
22047 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22050 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22052 -4 * (num_regs
- 1)));
22053 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
22054 plus_constant (Pmode
, stack_pointer_rtx
,
22056 tmp1
= gen_rtx_SET (mem1
, reg1
);
22057 tmp2
= gen_rtx_SET (mem2
, reg2
);
22058 RTX_FRAME_RELATED_P (tmp0
) = 1;
22059 RTX_FRAME_RELATED_P (tmp1
) = 1;
22060 RTX_FRAME_RELATED_P (tmp2
) = 1;
22061 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
22062 XVECEXP (par
, 0, 0) = tmp0
;
22063 XVECEXP (par
, 0, 1) = tmp1
;
22064 XVECEXP (par
, 0, 2) = tmp2
;
22065 insn
= emit_insn (par
);
22066 RTX_FRAME_RELATED_P (insn
) = 1;
22067 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22071 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22074 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22077 tmp1
= gen_rtx_SET (mem1
, reg1
);
22078 tmp2
= gen_rtx_SET (mem2
, reg2
);
22079 RTX_FRAME_RELATED_P (tmp1
) = 1;
22080 RTX_FRAME_RELATED_P (tmp2
) = 1;
22081 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22082 XVECEXP (par
, 0, 0) = tmp1
;
22083 XVECEXP (par
, 0, 1) = tmp2
;
22087 /* Create unwind information. This is an approximation. */
22088 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
22089 plus_constant (Pmode
,
22093 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
22094 plus_constant (Pmode
,
22099 RTX_FRAME_RELATED_P (tmp1
) = 1;
22100 RTX_FRAME_RELATED_P (tmp2
) = 1;
22101 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
22102 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
22104 regno
= regno2
+ 1;
22112 /* STRD in ARM mode requires consecutive registers. This function emits STRD
22113 whenever possible, otherwise it emits single-word stores. The first store
22114 also allocates stack space for all saved registers, using writeback with
22115 post-addressing mode. All other stores use offset addressing. If no STRD
22116 can be emitted, this function emits a sequence of single-word stores,
22117 and not an STM as before, because single-word stores provide more freedom
22118 scheduling and can be turned into an STM by peephole optimizations. */
22120 arm_emit_strd_push (unsigned long saved_regs_mask
)
22123 int i
, j
, dwarf_index
= 0;
22125 rtx dwarf
= NULL_RTX
;
22126 rtx insn
= NULL_RTX
;
22129 /* TODO: A more efficient code can be emitted by changing the
22130 layout, e.g., first push all pairs that can use STRD to keep the
22131 stack aligned, and then push all other registers. */
22132 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22133 if (saved_regs_mask
& (1 << i
))
22136 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
22137 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
22138 gcc_assert (num_regs
> 0);
22140 /* Create sequence for DWARF info. */
22141 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
22143 /* For dwarf info, we generate explicit stack update. */
22144 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22145 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
22146 RTX_FRAME_RELATED_P (tmp
) = 1;
22147 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22149 /* Save registers. */
22150 offset
= - 4 * num_regs
;
22152 while (j
<= LAST_ARM_REGNUM
)
22153 if (saved_regs_mask
& (1 << j
))
22156 && (saved_regs_mask
& (1 << (j
+ 1))))
22158 /* Current register and previous register form register pair for
22159 which STRD can be generated. */
22162 /* Allocate stack space for all saved registers. */
22163 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22164 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
22165 mem
= gen_frame_mem (DImode
, tmp
);
22168 else if (offset
> 0)
22169 mem
= gen_frame_mem (DImode
,
22170 plus_constant (Pmode
,
22174 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
22176 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
22177 RTX_FRAME_RELATED_P (tmp
) = 1;
22178 tmp
= emit_insn (tmp
);
22180 /* Record the first store insn. */
22181 if (dwarf_index
== 1)
22184 /* Generate dwarf info. */
22185 mem
= gen_frame_mem (SImode
,
22186 plus_constant (Pmode
,
22189 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22190 RTX_FRAME_RELATED_P (tmp
) = 1;
22191 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22193 mem
= gen_frame_mem (SImode
,
22194 plus_constant (Pmode
,
22197 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
22198 RTX_FRAME_RELATED_P (tmp
) = 1;
22199 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22206 /* Emit a single word store. */
22209 /* Allocate stack space for all saved registers. */
22210 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22211 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
22212 mem
= gen_frame_mem (SImode
, tmp
);
22215 else if (offset
> 0)
22216 mem
= gen_frame_mem (SImode
,
22217 plus_constant (Pmode
,
22221 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
22223 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22224 RTX_FRAME_RELATED_P (tmp
) = 1;
22225 tmp
= emit_insn (tmp
);
22227 /* Record the first store insn. */
22228 if (dwarf_index
== 1)
22231 /* Generate dwarf info. */
22232 mem
= gen_frame_mem (SImode
,
22233 plus_constant(Pmode
,
22236 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22237 RTX_FRAME_RELATED_P (tmp
) = 1;
22238 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22247 /* Attach dwarf info to the first insn we generate. */
22248 gcc_assert (insn
!= NULL_RTX
);
22249 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22250 RTX_FRAME_RELATED_P (insn
) = 1;
22253 /* Generate and emit an insn that we will recognize as a push_multi.
22254 Unfortunately, since this insn does not reflect very well the actual
22255 semantics of the operation, we need to annotate the insn for the benefit
22256 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
22257 MASK for registers that should be annotated for DWARF2 frame unwind
22260 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
22263 int num_dwarf_regs
= 0;
22267 int dwarf_par_index
;
22270 /* We don't record the PC in the dwarf frame information. */
22271 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
22273 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22275 if (mask
& (1 << i
))
22277 if (dwarf_regs_mask
& (1 << i
))
22281 gcc_assert (num_regs
&& num_regs
<= 16);
22282 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
22284 /* For the body of the insn we are going to generate an UNSPEC in
22285 parallel with several USEs. This allows the insn to be recognized
22286 by the push_multi pattern in the arm.md file.
22288 The body of the insn looks something like this:
22291 (set (mem:BLK (pre_modify:SI (reg:SI sp)
22292 (const_int:SI <num>)))
22293 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22299 For the frame note however, we try to be more explicit and actually
22300 show each register being stored into the stack frame, plus a (single)
22301 decrement of the stack pointer. We do it this way in order to be
22302 friendly to the stack unwinding code, which only wants to see a single
22303 stack decrement per instruction. The RTL we generate for the note looks
22304 something like this:
22307 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22308 (set (mem:SI (reg:SI sp)) (reg:SI r4))
22309 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22310 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22314 FIXME:: In an ideal world the PRE_MODIFY would not exist and
22315 instead we'd have a parallel expression detailing all
22316 the stores to the various memory addresses so that debug
22317 information is more up-to-date. Remember however while writing
22318 this to take care of the constraints with the push instruction.
22320 Note also that this has to be taken care of for the VFP registers.
22322 For more see PR43399. */
22324 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
22325 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
22326 dwarf_par_index
= 1;
22328 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22330 if (mask
& (1 << i
))
22332 /* NOTE: Dwarf code emitter handle reg-reg copies correctly and in the
22333 following example reg-reg copy of SP to IP register is handled
22334 through .cfi_def_cfa_register directive and the .cfi_offset
22335 directive for IP register is skipped by dwarf code emitter.
22338 .cfi_def_cfa_register 12
22339 push {fp, ip, lr, pc}
22340 .cfi_offset 11, -16
22341 .cfi_offset 13, -12
22344 Where as Arm-specific .save directive handling is different to that
22345 of dwarf code emitter and it doesn't consider reg-reg copies while
22346 updating the register list. When PACBTI is enabled we manually
22347 updated the .save directive register list to use "ra_auth_code"
22348 (pseduo register 143) instead of IP register as shown in following
22352 .cfi_register 143, 12
22353 push {r3, r7, ip, lr}
22354 .save {r3, r7, ra_auth_code, lr}
22356 rtx dwarf_reg
= reg
= gen_rtx_REG (SImode
, i
);
22357 if (arm_current_function_pac_enabled_p () && i
== IP_REGNUM
)
22358 dwarf_reg
= gen_rtx_REG (SImode
, RA_AUTH_CODE
);
22360 XVECEXP (par
, 0, 0)
22361 = gen_rtx_SET (gen_frame_mem
22363 gen_rtx_PRE_MODIFY (Pmode
,
22366 (Pmode
, stack_pointer_rtx
,
22369 gen_rtx_UNSPEC (BLKmode
,
22370 gen_rtvec (1, reg
),
22371 UNSPEC_PUSH_MULT
));
22373 if (dwarf_regs_mask
& (1 << i
))
22375 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
22377 RTX_FRAME_RELATED_P (tmp
) = 1;
22378 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
22385 for (j
= 1, i
++; j
< num_regs
; i
++)
22387 if (mask
& (1 << i
))
22389 rtx dwarf_reg
= reg
= gen_rtx_REG (SImode
, i
);
22390 if (arm_current_function_pac_enabled_p () && i
== IP_REGNUM
)
22391 dwarf_reg
= gen_rtx_REG (SImode
, RA_AUTH_CODE
);
22393 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
22395 if (dwarf_regs_mask
& (1 << i
))
22398 = gen_rtx_SET (gen_frame_mem
22400 plus_constant (Pmode
, stack_pointer_rtx
,
22403 RTX_FRAME_RELATED_P (tmp
) = 1;
22404 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
22411 par
= emit_insn (par
);
22413 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22414 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
22415 RTX_FRAME_RELATED_P (tmp
) = 1;
22416 XVECEXP (dwarf
, 0, 0) = tmp
;
22418 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
22423 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22424 SIZE is the offset to be adjusted.
22425 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22427 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
22431 RTX_FRAME_RELATED_P (insn
) = 1;
22432 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
22433 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
22436 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22437 SAVED_REGS_MASK shows which registers need to be restored.
22439 Unfortunately, since this insn does not reflect very well the actual
22440 semantics of the operation, we need to annotate the insn for the benefit
22441 of DWARF2 frame unwind information. */
22443 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
22448 rtx dwarf
= NULL_RTX
;
22450 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
22454 offset_adj
= return_in_pc
? 1 : 0;
22455 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22456 if (saved_regs_mask
& (1 << i
))
22459 gcc_assert (num_regs
&& num_regs
<= 16);
22461 /* If SP is in reglist, then we don't emit SP update insn. */
22462 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
22464 /* The parallel needs to hold num_regs SETs
22465 and one SET for the stack update. */
22466 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
22469 XVECEXP (par
, 0, 0) = ret_rtx
;
22473 /* Increment the stack pointer, based on there being
22474 num_regs 4-byte registers to restore. */
22475 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22476 plus_constant (Pmode
,
22479 RTX_FRAME_RELATED_P (tmp
) = 1;
22480 XVECEXP (par
, 0, offset_adj
) = tmp
;
22483 /* Now restore every reg, which may include PC. */
22484 for (j
= 0, i
= 0; j
< num_regs
; i
++)
22485 if (saved_regs_mask
& (1 << i
))
22487 rtx dwarf_reg
= reg
= gen_rtx_REG (SImode
, i
);
22488 if (arm_current_function_pac_enabled_p () && i
== IP_REGNUM
)
22489 dwarf_reg
= gen_rtx_REG (SImode
, RA_AUTH_CODE
);
22490 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
22492 /* Emit single load with writeback. */
22493 tmp
= gen_frame_mem (SImode
,
22494 gen_rtx_POST_INC (Pmode
,
22495 stack_pointer_rtx
));
22496 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
22497 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, dwarf_reg
,
22502 tmp
= gen_rtx_SET (reg
,
22505 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
22506 RTX_FRAME_RELATED_P (tmp
) = 1;
22507 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
22509 /* We need to maintain a sequence for DWARF info too. As dwarf info
22510 should not have PC, skip PC. */
22511 if (i
!= PC_REGNUM
)
22512 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, dwarf_reg
, dwarf
);
22518 par
= emit_jump_insn (par
);
22520 par
= emit_insn (par
);
22522 REG_NOTES (par
) = dwarf
;
22524 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
22525 stack_pointer_rtx
, stack_pointer_rtx
);
22528 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22529 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22531 Unfortunately, since this insn does not reflect very well the actual
22532 semantics of the operation, we need to annotate the insn for the benefit
22533 of DWARF2 frame unwind information. */
22535 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
22539 rtx dwarf
= NULL_RTX
;
22542 gcc_assert (num_regs
&& num_regs
<= 32);
22544 /* Workaround ARM10 VFPr1 bug. */
22545 if (num_regs
== 2 && !arm_arch6
)
22547 if (first_reg
== 15)
22553 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22554 there could be up to 32 D-registers to restore.
22555 If there are more than 16 D-registers, make two recursive calls,
22556 each of which emits one pop_multi instruction. */
22559 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
22560 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
22564 /* The parallel needs to hold num_regs SETs
22565 and one SET for the stack update. */
22566 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
22568 /* Increment the stack pointer, based on there being
22569 num_regs 8-byte registers to restore. */
22570 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
22571 RTX_FRAME_RELATED_P (tmp
) = 1;
22572 XVECEXP (par
, 0, 0) = tmp
;
22574 /* Now show every reg that will be restored, using a SET for each. */
22575 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
22577 reg
= gen_rtx_REG (DFmode
, i
);
22579 tmp
= gen_rtx_SET (reg
,
22582 plus_constant (Pmode
, base_reg
, 8 * j
)));
22583 RTX_FRAME_RELATED_P (tmp
) = 1;
22584 XVECEXP (par
, 0, j
+ 1) = tmp
;
22586 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22591 par
= emit_insn (par
);
22592 REG_NOTES (par
) = dwarf
;
22594 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22595 if (REGNO (base_reg
) == IP_REGNUM
)
22597 RTX_FRAME_RELATED_P (par
) = 1;
22598 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
22601 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
22602 base_reg
, base_reg
);
22605 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22606 number of registers are being popped, multiple LDRD patterns are created for
22607 all register pairs. If odd number of registers are popped, last register is
22608 loaded by using LDR pattern. */
22610 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
22614 rtx par
= NULL_RTX
;
22615 rtx dwarf
= NULL_RTX
;
22616 rtx tmp
, reg
, tmp1
;
22617 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
22619 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22620 if (saved_regs_mask
& (1 << i
))
22623 gcc_assert (num_regs
&& num_regs
<= 16);
22625 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22626 to be popped. So, if num_regs is even, now it will become odd,
22627 and we can generate pop with PC. If num_regs is odd, it will be
22628 even now, and ldr with return can be generated for PC. */
22632 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
22634 /* Var j iterates over all the registers to gather all the registers in
22635 saved_regs_mask. Var i gives index of saved registers in stack frame.
22636 A PARALLEL RTX of register-pair is created here, so that pattern for
22637 LDRD can be matched. As PC is always last register to be popped, and
22638 we have already decremented num_regs if PC, we don't have to worry
22639 about PC in this loop. */
22640 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
22641 if (saved_regs_mask
& (1 << j
))
22643 /* Create RTX for memory load. */
22644 reg
= gen_rtx_REG (SImode
, j
);
22645 tmp
= gen_rtx_SET (reg
,
22646 gen_frame_mem (SImode
,
22647 plus_constant (Pmode
,
22648 stack_pointer_rtx
, 4 * i
)));
22649 RTX_FRAME_RELATED_P (tmp
) = 1;
22653 /* When saved-register index (i) is even, the RTX to be emitted is
22654 yet to be created. Hence create it first. The LDRD pattern we
22655 are generating is :
22656 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22657 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22658 where target registers need not be consecutive. */
22659 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22663 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22664 added as 0th element and if i is odd, reg_i is added as 1st element
22665 of LDRD pattern shown above. */
22666 XVECEXP (par
, 0, (i
% 2)) = tmp
;
22667 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22671 /* When saved-register index (i) is odd, RTXs for both the registers
22672 to be loaded are generated in above given LDRD pattern, and the
22673 pattern can be emitted now. */
22674 par
= emit_insn (par
);
22675 REG_NOTES (par
) = dwarf
;
22676 RTX_FRAME_RELATED_P (par
) = 1;
22682 /* If the number of registers pushed is odd AND return_in_pc is false OR
22683 number of registers are even AND return_in_pc is true, last register is
22684 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22685 then LDR with post increment. */
22687 /* Increment the stack pointer, based on there being
22688 num_regs 4-byte registers to restore. */
22689 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22690 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
22691 RTX_FRAME_RELATED_P (tmp
) = 1;
22692 tmp
= emit_insn (tmp
);
22695 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
22696 stack_pointer_rtx
, stack_pointer_rtx
);
22701 if (((num_regs
% 2) == 1 && !return_in_pc
)
22702 || ((num_regs
% 2) == 0 && return_in_pc
))
22704 /* Scan for the single register to be popped. Skip until the saved
22705 register is found. */
22706 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
22708 /* Gen LDR with post increment here. */
22709 tmp1
= gen_rtx_MEM (SImode
,
22710 gen_rtx_POST_INC (SImode
,
22711 stack_pointer_rtx
));
22712 set_mem_alias_set (tmp1
, get_frame_alias_set ());
22714 reg
= gen_rtx_REG (SImode
, j
);
22715 tmp
= gen_rtx_SET (reg
, tmp1
);
22716 RTX_FRAME_RELATED_P (tmp
) = 1;
22717 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22721 /* If return_in_pc, j must be PC_REGNUM. */
22722 gcc_assert (j
== PC_REGNUM
);
22723 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22724 XVECEXP (par
, 0, 0) = ret_rtx
;
22725 XVECEXP (par
, 0, 1) = tmp
;
22726 par
= emit_jump_insn (par
);
22730 par
= emit_insn (tmp
);
22731 REG_NOTES (par
) = dwarf
;
22732 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
22733 stack_pointer_rtx
, stack_pointer_rtx
);
22737 else if ((num_regs
% 2) == 1 && return_in_pc
)
22739 /* There are 2 registers to be popped. So, generate the pattern
22740 pop_multiple_with_stack_update_and_return to pop in PC. */
22741 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
22747 /* LDRD in ARM mode needs consecutive registers as operands. This function
22748 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22749 offset addressing and then generates one separate stack udpate. This provides
22750 more scheduling freedom, compared to writeback on every load. However,
22751 if the function returns using load into PC directly
22752 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22753 before the last load. TODO: Add a peephole optimization to recognize
22754 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22755 peephole optimization to merge the load at stack-offset zero
22756 with the stack update instruction using load with writeback
22757 in post-index addressing mode. */
22759 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
22763 rtx par
= NULL_RTX
;
22764 rtx dwarf
= NULL_RTX
;
22767 /* Restore saved registers. */
22768 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
22770 while (j
<= LAST_ARM_REGNUM
)
22771 if (saved_regs_mask
& (1 << j
))
22774 && (saved_regs_mask
& (1 << (j
+ 1)))
22775 && (j
+ 1) != PC_REGNUM
)
22777 /* Current register and next register form register pair for which
22778 LDRD can be generated. PC is always the last register popped, and
22779 we handle it separately. */
22781 mem
= gen_frame_mem (DImode
,
22782 plus_constant (Pmode
,
22786 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
22788 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
22789 tmp
= emit_insn (tmp
);
22790 RTX_FRAME_RELATED_P (tmp
) = 1;
22792 /* Generate dwarf info. */
22794 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22795 gen_rtx_REG (SImode
, j
),
22797 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22798 gen_rtx_REG (SImode
, j
+ 1),
22801 REG_NOTES (tmp
) = dwarf
;
22806 else if (j
!= PC_REGNUM
)
22808 /* Emit a single word load. */
22810 mem
= gen_frame_mem (SImode
,
22811 plus_constant (Pmode
,
22815 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
22817 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
22818 tmp
= emit_insn (tmp
);
22819 RTX_FRAME_RELATED_P (tmp
) = 1;
22821 /* Generate dwarf info. */
22822 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
22823 gen_rtx_REG (SImode
, j
),
22829 else /* j == PC_REGNUM */
22835 /* Update the stack. */
22838 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22839 plus_constant (Pmode
,
22842 tmp
= emit_insn (tmp
);
22843 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
22844 stack_pointer_rtx
, stack_pointer_rtx
);
22848 if (saved_regs_mask
& (1 << PC_REGNUM
))
22850 /* Only PC is to be popped. */
22851 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22852 XVECEXP (par
, 0, 0) = ret_rtx
;
22853 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
22854 gen_frame_mem (SImode
,
22855 gen_rtx_POST_INC (SImode
,
22856 stack_pointer_rtx
)));
22857 RTX_FRAME_RELATED_P (tmp
) = 1;
22858 XVECEXP (par
, 0, 1) = tmp
;
22859 par
= emit_jump_insn (par
);
22861 /* Generate dwarf info. */
22862 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22863 gen_rtx_REG (SImode
, PC_REGNUM
),
22865 REG_NOTES (par
) = dwarf
;
22866 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
22867 stack_pointer_rtx
, stack_pointer_rtx
);
22871 /* Calculate the size of the return value that is passed in registers. */
22873 arm_size_return_regs (void)
22877 if (crtl
->return_rtx
!= 0)
22878 mode
= GET_MODE (crtl
->return_rtx
);
22880 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
22882 return GET_MODE_SIZE (mode
);
22885 /* Return true if the current function needs to save/restore LR. */
22887 thumb_force_lr_save (void)
22889 return !cfun
->machine
->lr_save_eliminated
22891 || thumb_far_jump_used_p ()
22892 || df_regs_ever_live_p (LR_REGNUM
));
22895 /* We do not know if r3 will be available because
22896 we do have an indirect tailcall happening in this
22897 particular case. */
22899 is_indirect_tailcall_p (rtx call
)
22901 rtx pat
= PATTERN (call
);
22903 /* Indirect tail call. */
22904 pat
= XVECEXP (pat
, 0, 0);
22905 if (GET_CODE (pat
) == SET
)
22906 pat
= SET_SRC (pat
);
22908 pat
= XEXP (XEXP (pat
, 0), 0);
22909 return REG_P (pat
);
22912 /* Return true if r3 is used by any of the tail call insns in the
22913 current function. */
22915 any_sibcall_could_use_r3 (void)
22920 if (!crtl
->tail_call_emit
)
22922 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
22923 if (e
->flags
& EDGE_SIBCALL
)
22925 rtx_insn
*call
= BB_END (e
->src
);
22926 if (!CALL_P (call
))
22927 call
= prev_nonnote_nondebug_insn (call
);
22928 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
22929 if (find_regno_fusage (call
, USE
, 3)
22930 || is_indirect_tailcall_p (call
))
22937 /* Compute the distance from register FROM to register TO.
22938 These can be the arg pointer (26), the soft frame pointer (25),
22939 the stack pointer (13) or the hard frame pointer (11).
22940 In thumb mode r7 is used as the soft frame pointer, if needed.
22941 Typical stack layout looks like this:
22943 old stack pointer -> | |
22946 | | saved arguments for
22947 | | vararg functions
22950 hard FP & arg pointer -> | | \
22958 soft frame pointer -> | | /
22963 locals base pointer -> | | /
22968 current stack pointer -> | | /
22971 For a given function some or all of these stack components
22972 may not be needed, giving rise to the possibility of
22973 eliminating some of the registers.
22975 The values returned by this function must reflect the behavior
22976 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22978 The sign of the number returned reflects the direction of stack
22979 growth, so the values are positive for all eliminations except
22980 from the soft frame pointer to the hard frame pointer.
22982 SFP may point just inside the local variables block to ensure correct
22986 /* Return cached stack offsets. */
22988 static arm_stack_offsets
*
22989 arm_get_frame_offsets (void)
22991 struct arm_stack_offsets
*offsets
;
22993 offsets
= &cfun
->machine
->stack_offsets
;
22999 /* Calculate stack offsets. These are used to calculate register elimination
23000 offsets and in prologue/epilogue code. Also calculates which registers
23001 should be saved. */
23004 arm_compute_frame_layout (void)
23006 struct arm_stack_offsets
*offsets
;
23007 unsigned long func_type
;
23010 HOST_WIDE_INT frame_size
;
23013 offsets
= &cfun
->machine
->stack_offsets
;
23015 /* Initially this is the size of the local variables. It will translated
23016 into an offset once we have determined the size of preceding data. */
23017 frame_size
= ROUND_UP_WORD (get_frame_size ());
23019 /* Space for variadic functions. */
23020 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
23022 /* In Thumb mode this is incorrect, but never used. */
23024 = (offsets
->saved_args
23025 + arm_compute_static_chain_stack_bytes ()
23026 + (frame_pointer_needed
? 4 : 0));
23030 unsigned int regno
;
23032 offsets
->saved_regs_mask
= arm_compute_save_core_reg_mask ();
23033 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
23034 saved
= core_saved
;
23036 /* We know that SP will be doubleword aligned on entry, and we must
23037 preserve that condition at any subroutine call. We also require the
23038 soft frame pointer to be doubleword aligned. */
23040 if (TARGET_REALLY_IWMMXT
)
23042 /* Check for the call-saved iWMMXt registers. */
23043 for (regno
= FIRST_IWMMXT_REGNUM
;
23044 regno
<= LAST_IWMMXT_REGNUM
;
23046 if (reg_needs_saving_p (regno
))
23050 func_type
= arm_current_func_type ();
23051 /* Space for saved VFP registers. */
23052 if (! IS_VOLATILE (func_type
)
23053 && TARGET_VFP_BASE
)
23054 saved
+= arm_get_vfp_saved_size ();
23056 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
23057 nonecure entry functions with VSTR/VLDR. */
23058 if (TARGET_HAVE_FPCXT_CMSE
&& IS_CMSE_ENTRY (func_type
))
23061 else /* TARGET_THUMB1 */
23063 offsets
->saved_regs_mask
= thumb1_compute_save_core_reg_mask ();
23064 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
23065 saved
= core_saved
;
23066 if (TARGET_BACKTRACE
)
23070 /* Saved registers include the stack frame. */
23071 offsets
->saved_regs
23072 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
23073 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
23075 /* A leaf function does not need any stack alignment if it has nothing
23077 if (crtl
->is_leaf
&& frame_size
== 0
23078 /* However if it calls alloca(), we have a dynamically allocated
23079 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
23080 && ! cfun
->calls_alloca
)
23082 offsets
->outgoing_args
= offsets
->soft_frame
;
23083 offsets
->locals_base
= offsets
->soft_frame
;
23087 /* Ensure SFP has the correct alignment. */
23088 if (ARM_DOUBLEWORD_ALIGN
23089 && (offsets
->soft_frame
& 7))
23091 offsets
->soft_frame
+= 4;
23092 /* Try to align stack by pushing an extra reg. Don't bother doing this
23093 when there is a stack frame as the alignment will be rolled into
23094 the normal stack adjustment. */
23095 if (frame_size
+ crtl
->outgoing_args_size
== 0)
23099 /* Register r3 is caller-saved. Normally it does not need to be
23100 saved on entry by the prologue. However if we choose to save
23101 it for padding then we may confuse the compiler into thinking
23102 a prologue sequence is required when in fact it is not. This
23103 will occur when shrink-wrapping if r3 is used as a scratch
23104 register and there are no other callee-saved writes.
23106 This situation can be avoided when other callee-saved registers
23107 are available and r3 is not mandatory if we choose a callee-saved
23108 register for padding. */
23109 bool prefer_callee_reg_p
= false;
23111 /* If it is safe to use r3, then do so. This sometimes
23112 generates better code on Thumb-2 by avoiding the need to
23113 use 32-bit push/pop instructions. */
23114 if (! any_sibcall_could_use_r3 ()
23115 && arm_size_return_regs () <= 12
23116 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
23118 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
23121 if (!TARGET_THUMB2
)
23122 prefer_callee_reg_p
= true;
23125 || prefer_callee_reg_p
)
23127 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
23129 /* Avoid fixed registers; they may be changed at
23130 arbitrary times so it's unsafe to restore them
23131 during the epilogue. */
23133 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
23143 offsets
->saved_regs
+= 4;
23144 offsets
->saved_regs_mask
|= (1 << reg
);
23149 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
23150 offsets
->outgoing_args
= (offsets
->locals_base
23151 + crtl
->outgoing_args_size
);
23153 if (ARM_DOUBLEWORD_ALIGN
)
23155 /* Ensure SP remains doubleword aligned. */
23156 if (offsets
->outgoing_args
& 7)
23157 offsets
->outgoing_args
+= 4;
23158 gcc_assert (!(offsets
->outgoing_args
& 7));
23163 /* Calculate the relative offsets for the different stack pointers. Positive
23164 offsets are in the direction of stack growth. */
23167 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
23169 arm_stack_offsets
*offsets
;
23171 offsets
= arm_get_frame_offsets ();
23173 /* OK, now we have enough information to compute the distances.
23174 There must be an entry in these switch tables for each pair
23175 of registers in ELIMINABLE_REGS, even if some of the entries
23176 seem to be redundant or useless. */
23179 case ARG_POINTER_REGNUM
:
23182 case THUMB_HARD_FRAME_POINTER_REGNUM
:
23185 case FRAME_POINTER_REGNUM
:
23186 /* This is the reverse of the soft frame pointer
23187 to hard frame pointer elimination below. */
23188 return offsets
->soft_frame
- offsets
->saved_args
;
23190 case ARM_HARD_FRAME_POINTER_REGNUM
:
23191 /* This is only non-zero in the case where the static chain register
23192 is stored above the frame. */
23193 return offsets
->frame
- offsets
->saved_args
- 4;
23195 case STACK_POINTER_REGNUM
:
23196 /* If nothing has been pushed on the stack at all
23197 then this will return -4. This *is* correct! */
23198 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
23201 gcc_unreachable ();
23203 gcc_unreachable ();
23205 case FRAME_POINTER_REGNUM
:
23208 case THUMB_HARD_FRAME_POINTER_REGNUM
:
23211 case ARM_HARD_FRAME_POINTER_REGNUM
:
23212 /* The hard frame pointer points to the top entry in the
23213 stack frame. The soft frame pointer to the bottom entry
23214 in the stack frame. If there is no stack frame at all,
23215 then they are identical. */
23217 return offsets
->frame
- offsets
->soft_frame
;
23219 case STACK_POINTER_REGNUM
:
23220 return offsets
->outgoing_args
- offsets
->soft_frame
;
23223 gcc_unreachable ();
23225 gcc_unreachable ();
23228 /* You cannot eliminate from the stack pointer.
23229 In theory you could eliminate from the hard frame
23230 pointer to the stack pointer, but this will never
23231 happen, since if a stack frame is not needed the
23232 hard frame pointer will never be used. */
23233 gcc_unreachable ();
23237 /* Given FROM and TO register numbers, say whether this elimination is
23238 allowed. Frame pointer elimination is automatically handled.
23240 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
23241 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
23242 pointer, we must eliminate FRAME_POINTER_REGNUM into
23243 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23244 ARG_POINTER_REGNUM. */
23247 arm_can_eliminate (const int from
, const int to
)
23249 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
23250 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
23251 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
23252 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
23256 /* Emit RTL to save coprocessor registers on function entry. Returns the
23257 number of bytes pushed. */
23260 arm_save_coproc_regs(void)
23262 int saved_size
= 0;
23264 unsigned start_reg
;
23267 if (TARGET_REALLY_IWMMXT
)
23268 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
23269 if (reg_needs_saving_p (reg
))
23271 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23272 insn
= gen_rtx_MEM (V2SImode
, insn
);
23273 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
23274 RTX_FRAME_RELATED_P (insn
) = 1;
23278 if (TARGET_VFP_BASE
)
23280 start_reg
= FIRST_VFP_REGNUM
;
23282 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
23284 if (!reg_needs_saving_p (reg
) && !reg_needs_saving_p (reg
+ 1))
23286 if (start_reg
!= reg
)
23287 saved_size
+= vfp_emit_fstmd (start_reg
,
23288 (reg
- start_reg
) / 2);
23289 start_reg
= reg
+ 2;
23292 if (start_reg
!= reg
)
23293 saved_size
+= vfp_emit_fstmd (start_reg
,
23294 (reg
- start_reg
) / 2);
23300 /* Set the Thumb frame pointer from the stack pointer. */
23303 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
23305 HOST_WIDE_INT amount
;
23308 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
23310 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23311 stack_pointer_rtx
, GEN_INT (amount
)));
23314 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
23315 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
23316 expects the first two operands to be the same. */
23319 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23321 hard_frame_pointer_rtx
));
23325 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23326 hard_frame_pointer_rtx
,
23327 stack_pointer_rtx
));
23329 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
23330 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
23331 RTX_FRAME_RELATED_P (dwarf
) = 1;
23332 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23335 RTX_FRAME_RELATED_P (insn
) = 1;
23338 struct scratch_reg
{
23343 /* Return a short-lived scratch register for use as a 2nd scratch register on
23344 function entry after the registers are saved in the prologue. This register
23345 must be released by means of release_scratch_register_on_entry. IP is not
23346 considered since it is always used as the 1st scratch register if available.
23348 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23349 mask of live registers. */
23352 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
23353 unsigned long live_regs
)
23359 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
23365 for (i
= 4; i
< 11; i
++)
23366 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
23374 /* If IP is used as the 1st scratch register for a nested function,
23375 then either r3 wasn't available or is used to preserve IP. */
23376 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
23378 regno
= (regno1
== 3 ? 2 : 3);
23380 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
23385 sr
->reg
= gen_rtx_REG (SImode
, regno
);
23388 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23389 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
23390 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
23391 plus_constant (Pmode
, stack_pointer_rtx
, -4));
23392 RTX_FRAME_RELATED_P (insn
) = 1;
23393 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
23397 /* Release a scratch register obtained from the preceding function. */
23400 release_scratch_register_on_entry (struct scratch_reg
*sr
)
23404 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
23405 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
23406 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
23407 plus_constant (Pmode
, stack_pointer_rtx
, 4));
23408 RTX_FRAME_RELATED_P (insn
) = 1;
23409 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
23413 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23415 #if PROBE_INTERVAL > 4096
23416 #error Cannot use indexed addressing mode for stack probing
23419 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23420 inclusive. These are offsets from the current stack pointer. REGNO1
23421 is the index number of the 1st scratch register and LIVE_REGS is the
23422 mask of live registers. */
23425 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
23426 unsigned int regno1
, unsigned long live_regs
)
23428 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
23430 /* See if we have a constant small number of probes to generate. If so,
23431 that's the easy case. */
23432 if (size
<= PROBE_INTERVAL
)
23434 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
23435 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23436 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
23439 /* The run-time loop is made up of 10 insns in the generic case while the
23440 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23441 else if (size
<= 5 * PROBE_INTERVAL
)
23443 HOST_WIDE_INT i
, rem
;
23445 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
23446 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23447 emit_stack_probe (reg1
);
23449 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23450 it exceeds SIZE. If only two probes are needed, this will not
23451 generate any code. Then probe at FIRST + SIZE. */
23452 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
23454 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
23455 emit_stack_probe (reg1
);
23458 rem
= size
- (i
- PROBE_INTERVAL
);
23459 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
23461 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
23462 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
23465 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
23468 /* Otherwise, do the same as above, but in a loop. Note that we must be
23469 extra careful with variables wrapping around because we might be at
23470 the very top (or the very bottom) of the address space and we have
23471 to be able to handle this case properly; in particular, we use an
23472 equality test for the loop condition. */
23475 HOST_WIDE_INT rounded_size
;
23476 struct scratch_reg sr
;
23478 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
23480 emit_move_insn (reg1
, GEN_INT (first
));
23483 /* Step 1: round SIZE to the previous multiple of the interval. */
23485 rounded_size
= size
& -PROBE_INTERVAL
;
23486 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
23489 /* Step 2: compute initial and final value of the loop counter. */
23491 /* TEST_ADDR = SP + FIRST. */
23492 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23494 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23495 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
23498 /* Step 3: the loop
23502 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23505 while (TEST_ADDR != LAST_ADDR)
23507 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23508 until it is equal to ROUNDED_SIZE. */
23510 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
23513 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23514 that SIZE is equal to ROUNDED_SIZE. */
23516 if (size
!= rounded_size
)
23518 HOST_WIDE_INT rem
= size
- rounded_size
;
23520 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
23522 emit_set_insn (sr
.reg
,
23523 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
23524 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
23525 PROBE_INTERVAL
- rem
));
23528 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
23531 release_scratch_register_on_entry (&sr
);
23534 /* Make sure nothing is scheduled before we are done. */
23535 emit_insn (gen_blockage ());
23538 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23539 absolute addresses. */
23542 output_probe_stack_range (rtx reg1
, rtx reg2
)
23544 static int labelno
= 0;
23548 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
23551 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
23553 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23555 xops
[1] = GEN_INT (PROBE_INTERVAL
);
23556 output_asm_insn ("sub\t%0, %0, %1", xops
);
23558 /* Probe at TEST_ADDR. */
23559 output_asm_insn ("str\tr0, [%0, #0]", xops
);
23561 /* Test if TEST_ADDR == LAST_ADDR. */
23563 output_asm_insn ("cmp\t%0, %1", xops
);
23566 fputs ("\tbne\t", asm_out_file
);
23567 assemble_name_raw (asm_out_file
, loop_lab
);
23568 fputc ('\n', asm_out_file
);
23573 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23576 arm_expand_prologue (void)
23581 unsigned long live_regs_mask
;
23582 unsigned long func_type
;
23584 int saved_pretend_args
= 0;
23585 int saved_regs
= 0;
23586 unsigned HOST_WIDE_INT args_to_push
;
23587 HOST_WIDE_INT size
;
23588 arm_stack_offsets
*offsets
;
23591 func_type
= arm_current_func_type ();
23593 /* Naked functions don't have prologues. */
23594 if (IS_NAKED (func_type
))
23596 if (flag_stack_usage_info
)
23597 current_function_static_stack_size
= 0;
23601 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23602 args_to_push
= crtl
->args
.pretend_args_size
;
23604 /* Compute which register we will have to save onto the stack. */
23605 offsets
= arm_get_frame_offsets ();
23606 live_regs_mask
= offsets
->saved_regs_mask
;
23608 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
23610 if (IS_STACKALIGN (func_type
))
23614 /* Handle a word-aligned stack pointer. We generate the following:
23619 <save and restore r0 in normal prologue/epilogue>
23623 The unwinder doesn't need to know about the stack realignment.
23624 Just tell it we saved SP in r0. */
23625 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
23627 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
23628 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
23630 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
23631 RTX_FRAME_RELATED_P (insn
) = 1;
23632 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
23634 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
23636 /* ??? The CFA changes here, which may cause GDB to conclude that it
23637 has entered a different function. That said, the unwind info is
23638 correct, individually, before and after this instruction because
23639 we've described the save of SP, which will override the default
23640 handling of SP as restoring from the CFA. */
23641 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
23644 /* Let's compute the static_chain_stack_bytes required and store it. Right
23645 now the value must be -1 as stored by arm_init_machine_status (). */
23646 cfun
->machine
->static_chain_stack_bytes
23647 = arm_compute_static_chain_stack_bytes ();
23649 /* The static chain register is the same as the IP register. If it is
23650 clobbered when creating the frame, we need to save and restore it. */
23651 clobber_ip
= (IS_NESTED (func_type
)
23652 && (((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
23653 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
23654 || flag_stack_clash_protection
)
23655 && !df_regs_ever_live_p (LR_REGNUM
)
23656 && arm_r3_live_at_start_p ()))
23657 || arm_current_function_pac_enabled_p ()));
23659 /* Find somewhere to store IP whilst the frame is being created.
23660 We try the following places in order:
23662 1. The last argument register r3 if it is available.
23663 2. A slot on the stack above the frame if there are no
23664 arguments to push onto the stack.
23665 3. Register r3 again, after pushing the argument registers
23666 onto the stack, if this is a varargs function.
23667 4. The last slot on the stack created for the arguments to
23668 push, if this isn't a varargs function.
23670 Note - we only need to tell the dwarf2 backend about the SP
23671 adjustment in the second variant; the static chain register
23672 doesn't need to be unwound, as it doesn't contain a value
23673 inherited from the caller. */
23676 if (!arm_r3_live_at_start_p ())
23677 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
23678 else if (args_to_push
== 0)
23684 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23685 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
23688 /* Just tell the dwarf backend that we adjusted SP. */
23689 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23690 plus_constant (Pmode
, stack_pointer_rtx
,
23692 RTX_FRAME_RELATED_P (insn
) = 1;
23693 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23694 if (arm_current_function_pac_enabled_p ())
23695 cfun
->machine
->pacspval_needed
= 1;
23699 /* Store the args on the stack. */
23700 if (cfun
->machine
->uses_anonymous_args
)
23702 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
23703 (0xf0 >> (args_to_push
/ 4)) & 0xf);
23704 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
23705 saved_pretend_args
= 1;
23711 if (args_to_push
== 4)
23712 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23714 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
23715 plus_constant (Pmode
,
23719 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
23721 /* Just tell the dwarf backend that we adjusted SP. */
23722 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23723 plus_constant (Pmode
, stack_pointer_rtx
,
23725 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23728 RTX_FRAME_RELATED_P (insn
) = 1;
23729 fp_offset
= args_to_push
;
23731 if (arm_current_function_pac_enabled_p ())
23732 cfun
->machine
->pacspval_needed
= 1;
23736 if (arm_current_function_pac_enabled_p ())
23738 /* If IP was clobbered we only emit a PAC instruction as the BTI
23739 one will be added before the push of the clobbered IP (if
23740 necessary) by the bti pass. */
23741 if (aarch_bti_enabled () && !clobber_ip
)
23742 insn
= emit_insn (gen_pacbti_nop ());
23744 insn
= emit_insn (gen_pac_nop ());
23746 rtx dwarf
= gen_rtx_SET (ip_rtx
, gen_rtx_REG (SImode
, RA_AUTH_CODE
));
23747 RTX_FRAME_RELATED_P (insn
) = 1;
23748 add_reg_note (insn
, REG_CFA_REGISTER
, dwarf
);
23751 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
23753 if (IS_INTERRUPT (func_type
))
23755 /* Interrupt functions must not corrupt any registers.
23756 Creating a frame pointer however, corrupts the IP
23757 register, so we must push it first. */
23758 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
23760 /* Do not set RTX_FRAME_RELATED_P on this insn.
23761 The dwarf stack unwinding code only wants to see one
23762 stack decrement per function, and this is not it. If
23763 this instruction is labeled as being part of the frame
23764 creation sequence then dwarf2out_frame_debug_expr will
23765 die when it encounters the assignment of IP to FP
23766 later on, since the use of SP here establishes SP as
23767 the CFA register and not IP.
23769 Anyway this instruction is not really part of the stack
23770 frame creation although it is part of the prologue. */
23773 insn
= emit_set_insn (ip_rtx
,
23774 plus_constant (Pmode
, stack_pointer_rtx
,
23776 RTX_FRAME_RELATED_P (insn
) = 1;
23779 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23780 if (TARGET_HAVE_FPCXT_CMSE
&& IS_CMSE_ENTRY (func_type
))
23783 insn
= emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx
,
23784 GEN_INT (FPCXTNS_ENUM
)));
23785 rtx dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23786 plus_constant (Pmode
, stack_pointer_rtx
, -4));
23787 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23788 RTX_FRAME_RELATED_P (insn
) = 1;
23793 /* Push the argument registers, or reserve space for them. */
23794 if (cfun
->machine
->uses_anonymous_args
)
23795 insn
= emit_multi_reg_push
23796 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
23797 (0xf0 >> (args_to_push
/ 4)) & 0xf);
23800 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
23801 GEN_INT (- args_to_push
)));
23802 RTX_FRAME_RELATED_P (insn
) = 1;
23805 /* If this is an interrupt service routine, and the link register
23806 is going to be pushed, and we're not generating extra
23807 push of IP (needed when frame is needed and frame layout if apcs),
23808 subtracting four from LR now will mean that the function return
23809 can be done with a single instruction. */
23810 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
23811 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
23812 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
23815 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
23817 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
23820 if (live_regs_mask
)
23822 unsigned long dwarf_regs_mask
= live_regs_mask
;
23824 saved_regs
+= bit_count (live_regs_mask
) * 4;
23825 if (optimize_size
&& !frame_pointer_needed
23826 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
23828 /* If no coprocessor registers are being pushed and we don't have
23829 to worry about a frame pointer then push extra registers to
23830 create the stack frame. This is done in a way that does not
23831 alter the frame layout, so is independent of the epilogue. */
23835 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
23837 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
23838 if (frame
&& n
* 4 >= frame
)
23841 live_regs_mask
|= (1 << n
) - 1;
23842 saved_regs
+= frame
;
23847 && current_tune
->prefer_ldrd_strd
23848 && !optimize_function_for_size_p (cfun
))
23850 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
23852 thumb2_emit_strd_push (live_regs_mask
);
23853 else if (TARGET_ARM
23854 && !TARGET_APCS_FRAME
23855 && !IS_INTERRUPT (func_type
))
23856 arm_emit_strd_push (live_regs_mask
);
23859 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
23860 RTX_FRAME_RELATED_P (insn
) = 1;
23865 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
23866 RTX_FRAME_RELATED_P (insn
) = 1;
23870 if (! IS_VOLATILE (func_type
))
23871 saved_regs
+= arm_save_coproc_regs ();
23873 if (frame_pointer_needed
&& TARGET_ARM
)
23875 /* Create the new frame pointer. */
23876 if (TARGET_APCS_FRAME
)
23878 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
23879 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
23880 RTX_FRAME_RELATED_P (insn
) = 1;
23884 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
23885 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23886 stack_pointer_rtx
, insn
));
23887 RTX_FRAME_RELATED_P (insn
) = 1;
23891 size
= offsets
->outgoing_args
- offsets
->saved_args
;
23892 if (flag_stack_usage_info
)
23893 current_function_static_stack_size
= size
;
23895 /* If this isn't an interrupt service routine and we have a frame, then do
23896 stack checking. We use IP as the first scratch register, except for the
23897 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
23898 if (!IS_INTERRUPT (func_type
)
23899 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
23900 || flag_stack_clash_protection
))
23902 unsigned int regno
;
23904 if (!IS_NESTED (func_type
) || clobber_ip
)
23906 else if (df_regs_ever_live_p (LR_REGNUM
))
23911 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
23913 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
23914 arm_emit_probe_stack_range (get_stack_check_protect (),
23915 size
- get_stack_check_protect (),
23916 regno
, live_regs_mask
);
23919 arm_emit_probe_stack_range (get_stack_check_protect (), size
,
23920 regno
, live_regs_mask
);
23923 /* Recover the static chain register. */
23926 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
23927 insn
= gen_rtx_REG (SImode
, 3);
23930 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
23931 insn
= gen_frame_mem (SImode
, insn
);
23933 emit_set_insn (ip_rtx
, insn
);
23934 emit_insn (gen_force_register_use (ip_rtx
));
23937 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
23939 /* This add can produce multiple insns for a large constant, so we
23940 need to get tricky. */
23941 rtx_insn
*last
= get_last_insn ();
23943 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
23944 - offsets
->outgoing_args
);
23946 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
23950 last
= last
? NEXT_INSN (last
) : get_insns ();
23951 RTX_FRAME_RELATED_P (last
) = 1;
23953 while (last
!= insn
);
23955 /* If the frame pointer is needed, emit a special barrier that
23956 will prevent the scheduler from moving stores to the frame
23957 before the stack adjustment. */
23958 if (frame_pointer_needed
)
23959 emit_insn (gen_stack_tie (stack_pointer_rtx
,
23960 hard_frame_pointer_rtx
));
23964 if (frame_pointer_needed
&& TARGET_THUMB2
)
23965 thumb_set_frame_pointer (offsets
);
23967 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
23969 unsigned long mask
;
23971 mask
= live_regs_mask
;
23972 mask
&= THUMB2_WORK_REGS
;
23973 if (!IS_NESTED (func_type
))
23974 mask
|= (1 << IP_REGNUM
);
23975 arm_load_pic_register (mask
, NULL_RTX
);
23978 /* If we are profiling, make sure no instructions are scheduled before
23979 the call to mcount. Similarly if the user has requested no
23980 scheduling in the prolog. Similarly if we want non-call exceptions
23981 using the EABI unwinder, to prevent faulting instructions from being
23982 swapped with a stack adjustment. */
23983 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
23984 || (arm_except_unwind_info (&global_options
) == UI_TARGET
23985 && cfun
->can_throw_non_call_exceptions
))
23986 emit_insn (gen_blockage ());
23988 /* If the link register is being kept alive, with the return address in it,
23989 then make sure that it does not get reused by the ce2 pass. */
23990 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
23991 cfun
->machine
->lr_save_eliminated
= 1;
23994 /* Print condition code to STREAM. Helper function for arm_print_operand. */
23996 arm_print_condition (FILE *stream
)
23998 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
24000 /* Branch conversion is not implemented for Thumb-2. */
24003 output_operand_lossage ("predicated Thumb instruction");
24006 if (current_insn_predicate
!= NULL
)
24008 output_operand_lossage
24009 ("predicated instruction in conditional sequence");
24013 fputs (arm_condition_codes
[arm_current_cc
], stream
);
24015 else if (current_insn_predicate
)
24017 enum arm_cond_code code
;
24021 output_operand_lossage ("predicated Thumb instruction");
24025 code
= get_arm_condition_code (current_insn_predicate
);
24026 fputs (arm_condition_codes
[code
], stream
);
24031 /* Globally reserved letters: acln
24032 Puncutation letters currently used: @_|?().!#
24033 Lower case letters currently used: bcdefhimpqtvwxyz
24034 Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
24035 Letters previously used, but now deprecated/obsolete: sWXYZ.
24037 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
24039 If CODE is 'd', then the X is a condition operand and the instruction
24040 should only be executed if the condition is true.
24041 if CODE is 'D', then the X is a condition operand and the instruction
24042 should only be executed if the condition is false: however, if the mode
24043 of the comparison is CCFPEmode, then always execute the instruction -- we
24044 do this because in these circumstances !GE does not necessarily imply LT;
24045 in these cases the instruction pattern will take care to make sure that
24046 an instruction containing %d will follow, thereby undoing the effects of
24047 doing this instruction unconditionally.
24048 If CODE is 'N' then X is a floating point operand that must be negated
24050 If CODE is 'B' then output a bitwise inverted value of X (a const int).
24051 If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
24052 If CODE is 'V', then the operand must be a CONST_INT representing
24053 the bits to preserve in the modified register (Rd) of a BFI or BFC
24054 instruction: print out both the width and lsb (shift) fields. */
24056 arm_print_operand (FILE *stream
, rtx x
, int code
)
24061 fputs (ASM_COMMENT_START
, stream
);
24065 fputs (user_label_prefix
, stream
);
24069 fputs (REGISTER_PREFIX
, stream
);
24073 arm_print_condition (stream
);
24077 /* The current condition code for a condition code setting instruction.
24078 Preceded by 's' in unified syntax, otherwise followed by 's'. */
24079 fputc('s', stream
);
24080 arm_print_condition (stream
);
24084 /* If the instruction is conditionally executed then print
24085 the current condition code, otherwise print 's'. */
24086 gcc_assert (TARGET_THUMB2
);
24087 if (current_insn_predicate
)
24088 arm_print_condition (stream
);
24090 fputc('s', stream
);
24093 /* %# is a "break" sequence. It doesn't output anything, but is used to
24094 separate e.g. operand numbers from following text, if that text consists
24095 of further digits which we don't want to be part of the operand
24103 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
24104 fprintf (stream
, "%s", fp_const_from_val (&r
));
24108 /* An integer or symbol address without a preceding # sign. */
24110 switch (GET_CODE (x
))
24113 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
24117 output_addr_const (stream
, x
);
24121 if (GET_CODE (XEXP (x
, 0)) == PLUS
24122 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
24124 output_addr_const (stream
, x
);
24127 /* Fall through. */
24130 output_operand_lossage ("Unsupported operand for code '%c'", code
);
24134 /* An integer that we want to print in HEX. */
24136 switch (GET_CODE (x
))
24139 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
24143 output_operand_lossage ("Unsupported operand for code '%c'", code
);
24148 if (CONST_INT_P (x
))
24151 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
24152 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
24156 putc ('~', stream
);
24157 output_addr_const (stream
, x
);
24162 /* Print the log2 of a CONST_INT. */
24166 if (!CONST_INT_P (x
)
24167 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
24168 output_operand_lossage ("Unsupported operand for code '%c'", code
);
24170 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
24175 /* The low 16 bits of an immediate constant. */
24176 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
24180 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
24184 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
24192 shift
= shift_op (x
, &val
);
24196 fprintf (stream
, ", %s ", shift
);
24198 arm_print_operand (stream
, XEXP (x
, 1), 0);
24200 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
24205 /* An explanation of the 'Q', 'R' and 'H' register operands:
24207 In a pair of registers containing a DI or DF value the 'Q'
24208 operand returns the register number of the register containing
24209 the least significant part of the value. The 'R' operand returns
24210 the register number of the register containing the most
24211 significant part of the value.
24213 The 'H' operand returns the higher of the two register numbers.
24214 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24215 same as the 'Q' operand, since the most significant part of the
24216 value is held in the lower number register. The reverse is true
24217 on systems where WORDS_BIG_ENDIAN is false.
24219 The purpose of these operands is to distinguish between cases
24220 where the endian-ness of the values is important (for example
24221 when they are added together), and cases where the endian-ness
24222 is irrelevant, but the order of register operations is important.
24223 For example when loading a value from memory into a register
24224 pair, the endian-ness does not matter. Provided that the value
24225 from the lower memory address is put into the lower numbered
24226 register, and the value from the higher address is put into the
24227 higher numbered register, the load will work regardless of whether
24228 the value being loaded is big-wordian or little-wordian. The
24229 order of the two register loads can matter however, if the address
24230 of the memory location is actually held in one of the registers
24231 being overwritten by the load.
24233 The 'Q' and 'R' constraints are also available for 64-bit
24236 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
24238 rtx part
= gen_lowpart (SImode
, x
);
24239 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
24243 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24245 output_operand_lossage ("invalid operand for code '%c'", code
);
24249 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
24253 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
24255 machine_mode mode
= GET_MODE (x
);
24258 if (mode
== VOIDmode
)
24260 part
= gen_highpart_mode (SImode
, mode
, x
);
24261 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
24265 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24267 output_operand_lossage ("invalid operand for code '%c'", code
);
24271 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
24275 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24277 output_operand_lossage ("invalid operand for code '%c'", code
);
24281 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
24285 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24287 output_operand_lossage ("invalid operand for code '%c'", code
);
24291 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
24295 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24297 output_operand_lossage ("invalid operand for code '%c'", code
);
24301 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
24305 asm_fprintf (stream
, "%r",
24306 REG_P (XEXP (x
, 0))
24307 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
24311 asm_fprintf (stream
, "{%r-%r}",
24313 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
24316 /* Like 'M', but writing doubleword vector registers, for use by Neon
24320 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
24321 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
24323 asm_fprintf (stream
, "{d%d}", regno
);
24325 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
24330 /* CONST_TRUE_RTX means always -- that's the default. */
24331 if (x
== const_true_rtx
)
24334 if (!COMPARISON_P (x
))
24336 output_operand_lossage ("invalid operand for code '%c'", code
);
24340 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
24345 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
24346 want to do that. */
24347 if (x
== const_true_rtx
)
24349 output_operand_lossage ("instruction never executed");
24352 if (!COMPARISON_P (x
))
24354 output_operand_lossage ("invalid operand for code '%c'", code
);
24358 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
24359 (get_arm_condition_code (x
))],
24365 /* Output the LSB (shift) and width for a bitmask instruction
24366 based on a literal mask. The LSB is printed first,
24367 followed by the width.
24369 Eg. For 0b1...1110001, the result is #1, #3. */
24370 if (!CONST_INT_P (x
))
24372 output_operand_lossage ("invalid operand for code '%c'", code
);
24376 unsigned HOST_WIDE_INT val
24377 = ~UINTVAL (x
) & HOST_WIDE_INT_UC (0xffffffff);
24378 int lsb
= exact_log2 (val
& -val
);
24379 asm_fprintf (stream
, "#%d, #%d", lsb
,
24380 (exact_log2 (val
+ (val
& -val
)) - lsb
));
24389 /* Former Maverick support, removed after GCC-4.7. */
24390 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
24395 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
24396 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
24397 /* Bad value for wCG register number. */
24399 output_operand_lossage ("invalid operand for code '%c'", code
);
24404 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
24407 /* Print an iWMMXt control register name. */
24409 if (!CONST_INT_P (x
)
24411 || INTVAL (x
) >= 16)
24412 /* Bad value for wC register number. */
24414 output_operand_lossage ("invalid operand for code '%c'", code
);
24420 static const char * wc_reg_names
[16] =
24422 "wCID", "wCon", "wCSSF", "wCASF",
24423 "wC4", "wC5", "wC6", "wC7",
24424 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24425 "wC12", "wC13", "wC14", "wC15"
24428 fputs (wc_reg_names
[INTVAL (x
)], stream
);
24432 /* Print the high single-precision register of a VFP double-precision
24436 machine_mode mode
= GET_MODE (x
);
24439 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
24441 output_operand_lossage ("invalid operand for code '%c'", code
);
24446 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
24448 output_operand_lossage ("invalid operand for code '%c'", code
);
24452 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
24456 /* Print a VFP/Neon double precision or quad precision register name. */
24460 machine_mode mode
= GET_MODE (x
);
24461 int is_quad
= (code
== 'q');
24464 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
24466 output_operand_lossage ("invalid operand for code '%c'", code
);
24471 || !IS_VFP_REGNUM (REGNO (x
)))
24473 output_operand_lossage ("invalid operand for code '%c'", code
);
24478 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
24479 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
24481 output_operand_lossage ("invalid operand for code '%c'", code
);
24485 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
24486 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
24490 /* These two codes print the low/high doubleword register of a Neon quad
24491 register, respectively. For pair-structure types, can also print
24492 low/high quadword registers. */
24496 machine_mode mode
= GET_MODE (x
);
24499 if ((GET_MODE_SIZE (mode
) != 16
24500 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
24502 output_operand_lossage ("invalid operand for code '%c'", code
);
24507 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
24509 output_operand_lossage ("invalid operand for code '%c'", code
);
24513 if (GET_MODE_SIZE (mode
) == 16)
24514 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
24515 + (code
== 'f' ? 1 : 0));
24517 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
24518 + (code
== 'f' ? 1 : 0));
24522 /* Print a VFPv3 floating-point constant, represented as an integer
24526 int index
= vfp3_const_double_index (x
);
24527 gcc_assert (index
!= -1);
24528 fprintf (stream
, "%d", index
);
24532 /* Print bits representing opcode features for Neon.
24534 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24535 and polynomials as unsigned.
24537 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24539 Bit 2 is 1 for rounding functions, 0 otherwise. */
24541 /* Identify the type as 's', 'u', 'p' or 'f'. */
24544 HOST_WIDE_INT bits
= INTVAL (x
);
24545 fputc ("uspf"[bits
& 3], stream
);
24549 /* Likewise, but signed and unsigned integers are both 'i'. */
24552 HOST_WIDE_INT bits
= INTVAL (x
);
24553 fputc ("iipf"[bits
& 3], stream
);
24557 /* As for 'T', but emit 'u' instead of 'p'. */
24560 HOST_WIDE_INT bits
= INTVAL (x
);
24561 fputc ("usuf"[bits
& 3], stream
);
24565 /* Bit 2: rounding (vs none). */
24568 HOST_WIDE_INT bits
= INTVAL (x
);
24569 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
24573 /* Memory operand for vld1/vst1 instruction. */
24577 bool postinc
= FALSE
;
24578 rtx postinc_reg
= NULL
;
24579 unsigned align
, memsize
, align_bits
;
24581 gcc_assert (MEM_P (x
));
24582 addr
= XEXP (x
, 0);
24583 if (GET_CODE (addr
) == POST_INC
)
24586 addr
= XEXP (addr
, 0);
24588 if (GET_CODE (addr
) == POST_MODIFY
)
24590 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
24591 addr
= XEXP (addr
, 0);
24593 asm_fprintf (stream
, "[%r", REGNO (addr
));
24595 /* We know the alignment of this access, so we can emit a hint in the
24596 instruction (for some alignments) as an aid to the memory subsystem
24598 align
= MEM_ALIGN (x
) >> 3;
24599 memsize
= MEM_SIZE (x
);
24601 /* Only certain alignment specifiers are supported by the hardware. */
24602 if (memsize
== 32 && (align
% 32) == 0)
24604 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
24606 else if (memsize
>= 8 && (align
% 8) == 0)
24611 if (align_bits
!= 0)
24612 asm_fprintf (stream
, ":%d", align_bits
);
24614 asm_fprintf (stream
, "]");
24617 fputs("!", stream
);
24619 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
24623 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24624 rtx_code the memory operands output looks like following.
24626 2. [Rn, #+/-<imm>]!
24632 rtx postinc_reg
= NULL
;
24633 unsigned inc_val
= 0;
24634 enum rtx_code code
;
24636 gcc_assert (MEM_P (x
));
24637 addr
= XEXP (x
, 0);
24638 code
= GET_CODE (addr
);
24639 if (code
== POST_INC
|| code
== POST_DEC
|| code
== PRE_INC
24640 || code
== PRE_DEC
)
24642 asm_fprintf (stream
, "[%r", REGNO (XEXP (addr
, 0)));
24643 inc_val
= GET_MODE_SIZE (GET_MODE (x
));
24644 if (code
== POST_INC
|| code
== POST_DEC
)
24645 asm_fprintf (stream
, "], #%s%d",(code
== POST_INC
)
24646 ? "": "-", inc_val
);
24648 asm_fprintf (stream
, ", #%s%d]!",(code
== PRE_INC
)
24649 ? "": "-", inc_val
);
24651 else if (code
== POST_MODIFY
|| code
== PRE_MODIFY
)
24653 asm_fprintf (stream
, "[%r", REGNO (XEXP (addr
, 0)));
24654 postinc_reg
= XEXP (XEXP (addr
, 1), 1);
24655 if (postinc_reg
&& CONST_INT_P (postinc_reg
))
24657 if (code
== POST_MODIFY
)
24658 asm_fprintf (stream
, "], #%wd",INTVAL (postinc_reg
));
24660 asm_fprintf (stream
, ", #%wd]!",INTVAL (postinc_reg
));
24663 else if (code
== PLUS
)
24665 rtx base
= XEXP (addr
, 0);
24666 rtx index
= XEXP (addr
, 1);
24668 gcc_assert (REG_P (base
) && CONST_INT_P (index
));
24670 HOST_WIDE_INT offset
= INTVAL (index
);
24671 asm_fprintf (stream
, "[%r, #%wd]", REGNO (base
), offset
);
24675 gcc_assert (REG_P (addr
));
24676 asm_fprintf (stream
, "[%r]",REGNO (addr
));
24685 gcc_assert (MEM_P (x
));
24686 addr
= XEXP (x
, 0);
24687 gcc_assert (REG_P (addr
));
24688 asm_fprintf (stream
, "[%r]", REGNO (addr
));
24692 /* Translate an S register number into a D register number and element index. */
24695 machine_mode mode
= GET_MODE (x
);
24698 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
24700 output_operand_lossage ("invalid operand for code '%c'", code
);
24705 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
24707 output_operand_lossage ("invalid operand for code '%c'", code
);
24711 regno
= regno
- FIRST_VFP_REGNUM
;
24712 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
24717 gcc_assert (CONST_DOUBLE_P (x
));
24719 result
= vfp3_const_double_for_fract_bits (x
);
24721 result
= vfp3_const_double_for_bits (x
);
24722 fprintf (stream
, "#%d", result
);
24725 /* Register specifier for vld1.16/vst1.16. Translate the S register
24726 number into a D register number and element index. */
24729 machine_mode mode
= GET_MODE (x
);
24732 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
24734 output_operand_lossage ("invalid operand for code '%c'", code
);
24739 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
24741 output_operand_lossage ("invalid operand for code '%c'", code
);
24745 regno
= regno
- FIRST_VFP_REGNUM
;
24746 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
24753 output_operand_lossage ("missing operand");
24757 switch (GET_CODE (x
))
24760 asm_fprintf (stream
, "%r", REGNO (x
));
24764 output_address (GET_MODE (x
), XEXP (x
, 0));
24770 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
24771 sizeof (fpstr
), 0, 1);
24772 fprintf (stream
, "#%s", fpstr
);
24777 gcc_assert (GET_CODE (x
) != NEG
);
24778 fputc ('#', stream
);
24779 if (GET_CODE (x
) == HIGH
)
24781 fputs (":lower16:", stream
);
24785 output_addr_const (stream
, x
);
24791 /* Target hook for printing a memory address. */
24793 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
24797 int is_minus
= GET_CODE (x
) == MINUS
;
24800 asm_fprintf (stream
, "[%r]", REGNO (x
));
24801 else if (GET_CODE (x
) == PLUS
|| is_minus
)
24803 rtx base
= XEXP (x
, 0);
24804 rtx index
= XEXP (x
, 1);
24805 HOST_WIDE_INT offset
= 0;
24807 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
24809 /* Ensure that BASE is a register. */
24810 /* (one of them must be). */
24811 /* Also ensure the SP is not used as in index register. */
24812 std::swap (base
, index
);
24814 switch (GET_CODE (index
))
24817 offset
= INTVAL (index
);
24820 asm_fprintf (stream
, "[%r, #%wd]",
24821 REGNO (base
), offset
);
24825 asm_fprintf (stream
, "[%r, %s%r]",
24826 REGNO (base
), is_minus
? "-" : "",
24836 asm_fprintf (stream
, "[%r, %s%r",
24837 REGNO (base
), is_minus
? "-" : "",
24838 REGNO (XEXP (index
, 0)));
24839 arm_print_operand (stream
, index
, 'S');
24840 fputs ("]", stream
);
24845 gcc_unreachable ();
24848 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
24849 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
24851 gcc_assert (REG_P (XEXP (x
, 0)));
24853 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
24854 asm_fprintf (stream
, "[%r, #%s%d]!",
24855 REGNO (XEXP (x
, 0)),
24856 GET_CODE (x
) == PRE_DEC
? "-" : "",
24857 GET_MODE_SIZE (mode
));
24858 else if (TARGET_HAVE_MVE
&& (mode
== OImode
|| mode
== XImode
))
24859 asm_fprintf (stream
, "[%r]!", REGNO (XEXP (x
,0)));
24861 asm_fprintf (stream
, "[%r], #%s%d", REGNO (XEXP (x
, 0)),
24862 GET_CODE (x
) == POST_DEC
? "-" : "",
24863 GET_MODE_SIZE (mode
));
24865 else if (GET_CODE (x
) == PRE_MODIFY
)
24867 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
24868 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
24869 asm_fprintf (stream
, "#%wd]!",
24870 INTVAL (XEXP (XEXP (x
, 1), 1)));
24872 asm_fprintf (stream
, "%r]!",
24873 REGNO (XEXP (XEXP (x
, 1), 1)));
24875 else if (GET_CODE (x
) == POST_MODIFY
)
24877 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
24878 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
24879 asm_fprintf (stream
, "#%wd",
24880 INTVAL (XEXP (XEXP (x
, 1), 1)));
24882 asm_fprintf (stream
, "%r",
24883 REGNO (XEXP (XEXP (x
, 1), 1)));
24885 else output_addr_const (stream
, x
);
24890 asm_fprintf (stream
, "[%r]", REGNO (x
));
24891 else if (GET_CODE (x
) == POST_INC
)
24892 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
24893 else if (GET_CODE (x
) == PLUS
)
24895 gcc_assert (REG_P (XEXP (x
, 0)));
24896 if (CONST_INT_P (XEXP (x
, 1)))
24897 asm_fprintf (stream
, "[%r, #%wd]",
24898 REGNO (XEXP (x
, 0)),
24899 INTVAL (XEXP (x
, 1)));
24901 asm_fprintf (stream
, "[%r, %r]",
24902 REGNO (XEXP (x
, 0)),
24903 REGNO (XEXP (x
, 1)));
24906 output_addr_const (stream
, x
);
24910 /* Target hook for indicating whether a punctuation character for
24911 TARGET_PRINT_OPERAND is valid. */
24913 arm_print_operand_punct_valid_p (unsigned char code
)
24915 return (code
== '@' || code
== '|' || code
== '.'
24916 || code
== '(' || code
== ')' || code
== '#'
24917 || (TARGET_32BIT
&& (code
== '?'))
24918 || (TARGET_THUMB2
&& (code
== '!'))
24919 || (TARGET_THUMB
&& (code
== '_')));
24922 /* Target hook for assembling integer objects. The ARM version needs to
24923 handle word-sized values specially. */
24925 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
24929 if (size
== UNITS_PER_WORD
&& aligned_p
)
24931 fputs ("\t.word\t", asm_out_file
);
24932 output_addr_const (asm_out_file
, x
);
24934 /* Mark symbols as position independent. We only do this in the
24935 .text segment, not in the .data segment. */
24936 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
24937 (SYMBOL_REF_P (x
) || LABEL_REF_P (x
)))
24939 /* See legitimize_pic_address for an explanation of the
24940 TARGET_VXWORKS_RTP check. */
24941 /* References to weak symbols cannot be resolved locally:
24942 they may be overridden by a non-weak definition at link
24944 if (!arm_pic_data_is_text_relative
24945 || (SYMBOL_REF_P (x
)
24946 && (!SYMBOL_REF_LOCAL_P (x
)
24947 || (SYMBOL_REF_DECL (x
)
24948 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0)
24949 || (SYMBOL_REF_FUNCTION_P (x
)
24950 && !arm_fdpic_local_funcdesc_p (x
)))))
24952 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
24953 fputs ("(GOTFUNCDESC)", asm_out_file
);
24955 fputs ("(GOT)", asm_out_file
);
24959 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
24960 fputs ("(GOTOFFFUNCDESC)", asm_out_file
);
24966 || arm_is_segment_info_known (x
, &is_readonly
))
24967 fputs ("(GOTOFF)", asm_out_file
);
24969 fputs ("(GOT)", asm_out_file
);
24974 /* For FDPIC we also have to mark symbol for .data section. */
24976 && !making_const_table
24977 && SYMBOL_REF_P (x
)
24978 && SYMBOL_REF_FUNCTION_P (x
))
24979 fputs ("(FUNCDESC)", asm_out_file
);
24981 fputc ('\n', asm_out_file
);
24985 mode
= GET_MODE (x
);
24987 if (arm_vector_mode_supported_p (mode
))
24991 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
24993 units
= CONST_VECTOR_NUNITS (x
);
24994 size
= GET_MODE_UNIT_SIZE (mode
);
24996 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
24997 for (i
= 0; i
< units
; i
++)
24999 rtx elt
= CONST_VECTOR_ELT (x
, i
);
25001 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
25004 for (i
= 0; i
< units
; i
++)
25006 rtx elt
= CONST_VECTOR_ELT (x
, i
);
25008 (*CONST_DOUBLE_REAL_VALUE (elt
),
25009 as_a
<scalar_float_mode
> (GET_MODE_INNER (mode
)),
25010 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
25016 return default_assemble_integer (x
, size
, aligned_p
);
25020 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
25024 if (!TARGET_AAPCS_BASED
)
25027 default_named_section_asm_out_constructor
25028 : default_named_section_asm_out_destructor
) (symbol
, priority
);
25032 /* Put these in the .init_array section, using a special relocation. */
25033 if (priority
!= DEFAULT_INIT_PRIORITY
)
25036 sprintf (buf
, "%s.%.5u",
25037 is_ctor
? ".init_array" : ".fini_array",
25039 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
25046 switch_to_section (s
);
25047 assemble_align (POINTER_SIZE
);
25048 fputs ("\t.word\t", asm_out_file
);
25049 output_addr_const (asm_out_file
, symbol
);
25050 fputs ("(target1)\n", asm_out_file
);
25053 /* Add a function to the list of static constructors. */
25056 arm_elf_asm_constructor (rtx symbol
, int priority
)
25058 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
25061 /* Add a function to the list of static destructors. */
25064 arm_elf_asm_destructor (rtx symbol
, int priority
)
25066 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
25069 /* A finite state machine takes care of noticing whether or not instructions
25070 can be conditionally executed, and thus decrease execution time and code
25071 size by deleting branch instructions. The fsm is controlled by
25072 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
25074 /* The state of the fsm controlling condition codes are:
25075 0: normal, do nothing special
25076 1: make ASM_OUTPUT_OPCODE not output this instruction
25077 2: make ASM_OUTPUT_OPCODE not output this instruction
25078 3: make instructions conditional
25079 4: make instructions conditional
25081 State transitions (state->state by whom under condition):
25082 0 -> 1 final_prescan_insn if the `target' is a label
25083 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
25084 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
25085 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
25086 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
25087 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
25088 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
25089 (the target insn is arm_target_insn).
25091 If the jump clobbers the conditions then we use states 2 and 4.
25093 A similar thing can be done with conditional return insns.
25095 XXX In case the `target' is an unconditional branch, this conditionalising
25096 of the instructions always reduces code size, but not always execution
25097 time. But then, I want to reduce the code size to somewhere near what
25098 /bin/cc produces. */
25100 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
25101 instructions. When a COND_EXEC instruction is seen the subsequent
25102 instructions are scanned so that multiple conditional instructions can be
25103 combined into a single IT block. arm_condexec_count and arm_condexec_mask
25104 specify the length and true/false mask for the IT block. These will be
25105 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
25107 /* Returns the index of the ARM condition code string in
25108 `arm_condition_codes', or ARM_NV if the comparison is invalid.
25109 COMPARISON should be an rtx like `(eq (...) (...))'. */
25112 maybe_get_arm_condition_code (rtx comparison
)
25114 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
25115 enum arm_cond_code code
;
25116 enum rtx_code comp_code
= GET_CODE (comparison
);
25118 if (GET_MODE_CLASS (mode
) != MODE_CC
)
25119 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
25120 XEXP (comparison
, 1));
25124 case E_CC_DNEmode
: code
= ARM_NE
; goto dominance
;
25125 case E_CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
25126 case E_CC_DGEmode
: code
= ARM_GE
; goto dominance
;
25127 case E_CC_DGTmode
: code
= ARM_GT
; goto dominance
;
25128 case E_CC_DLEmode
: code
= ARM_LE
; goto dominance
;
25129 case E_CC_DLTmode
: code
= ARM_LT
; goto dominance
;
25130 case E_CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
25131 case E_CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
25132 case E_CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
25133 case E_CC_DLTUmode
: code
= ARM_CC
;
25136 if (comp_code
== EQ
)
25137 return ARM_INVERSE_CONDITION_CODE (code
);
25138 if (comp_code
== NE
)
25145 case NE
: return ARM_NE
;
25146 case EQ
: return ARM_EQ
;
25147 case GE
: return ARM_PL
;
25148 case LT
: return ARM_MI
;
25149 default: return ARM_NV
;
25155 case NE
: return ARM_NE
;
25156 case EQ
: return ARM_EQ
;
25157 default: return ARM_NV
;
25163 case NE
: return ARM_MI
;
25164 case EQ
: return ARM_PL
;
25165 default: return ARM_NV
;
25170 /* We can handle all cases except UNEQ and LTGT. */
25173 case GE
: return ARM_GE
;
25174 case GT
: return ARM_GT
;
25175 case LE
: return ARM_LS
;
25176 case LT
: return ARM_MI
;
25177 case NE
: return ARM_NE
;
25178 case EQ
: return ARM_EQ
;
25179 case ORDERED
: return ARM_VC
;
25180 case UNORDERED
: return ARM_VS
;
25181 case UNLT
: return ARM_LT
;
25182 case UNLE
: return ARM_LE
;
25183 case UNGT
: return ARM_HI
;
25184 case UNGE
: return ARM_PL
;
25185 /* UNEQ and LTGT do not have a representation. */
25186 case UNEQ
: /* Fall through. */
25187 case LTGT
: /* Fall through. */
25188 default: return ARM_NV
;
25194 case NE
: return ARM_NE
;
25195 case EQ
: return ARM_EQ
;
25196 case GE
: return ARM_LE
;
25197 case GT
: return ARM_LT
;
25198 case LE
: return ARM_GE
;
25199 case LT
: return ARM_GT
;
25200 case GEU
: return ARM_LS
;
25201 case GTU
: return ARM_CC
;
25202 case LEU
: return ARM_CS
;
25203 case LTU
: return ARM_HI
;
25204 default: return ARM_NV
;
25210 case LTU
: return ARM_CS
;
25211 case GEU
: return ARM_CC
;
25212 default: return ARM_NV
;
25218 case GE
: return ARM_GE
;
25219 case LT
: return ARM_LT
;
25220 default: return ARM_NV
;
25226 case GEU
: return ARM_CS
;
25227 case LTU
: return ARM_CC
;
25228 default: return ARM_NV
;
25234 case NE
: return ARM_VS
;
25235 case EQ
: return ARM_VC
;
25236 default: return ARM_NV
;
25242 case GEU
: return ARM_CS
;
25243 case LTU
: return ARM_CC
;
25244 default: return ARM_NV
;
25251 case NE
: return ARM_NE
;
25252 case EQ
: return ARM_EQ
;
25253 case GE
: return ARM_GE
;
25254 case GT
: return ARM_GT
;
25255 case LE
: return ARM_LE
;
25256 case LT
: return ARM_LT
;
25257 case GEU
: return ARM_CS
;
25258 case GTU
: return ARM_HI
;
25259 case LEU
: return ARM_LS
;
25260 case LTU
: return ARM_CC
;
25261 default: return ARM_NV
;
25264 default: gcc_unreachable ();
25268 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
25269 static enum arm_cond_code
25270 get_arm_condition_code (rtx comparison
)
25272 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
25273 gcc_assert (code
!= ARM_NV
);
25277 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
25278 code registers when not targetting Thumb1. The VFP condition register
25279 only exists when generating hard-float code. */
25281 arm_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
25287 *p2
= TARGET_VFP_BASE
? VFPCC_REGNUM
: INVALID_REGNUM
;
25291 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25294 thumb2_final_prescan_insn (rtx_insn
*insn
)
25296 rtx_insn
*first_insn
= insn
;
25297 rtx body
= PATTERN (insn
);
25299 enum arm_cond_code code
;
25304 /* max_insns_skipped in the tune was already taken into account in the
25305 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
25306 just emit the IT blocks as we can. It does not make sense to split
25308 max
= MAX_INSN_PER_IT_BLOCK
;
25310 /* Remove the previous insn from the count of insns to be output. */
25311 if (arm_condexec_count
)
25312 arm_condexec_count
--;
25314 /* Nothing to do if we are already inside a conditional block. */
25315 if (arm_condexec_count
)
25318 if (GET_CODE (body
) != COND_EXEC
)
25321 /* Conditional jumps are implemented directly. */
25325 predicate
= COND_EXEC_TEST (body
);
25326 arm_current_cc
= get_arm_condition_code (predicate
);
25328 n
= get_attr_ce_count (insn
);
25329 arm_condexec_count
= 1;
25330 arm_condexec_mask
= (1 << n
) - 1;
25331 arm_condexec_masklen
= n
;
25332 /* See if subsequent instructions can be combined into the same block. */
25335 insn
= next_nonnote_insn (insn
);
25337 /* Jumping into the middle of an IT block is illegal, so a label or
25338 barrier terminates the block. */
25339 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
25342 body
= PATTERN (insn
);
25343 /* USE and CLOBBER aren't really insns, so just skip them. */
25344 if (GET_CODE (body
) == USE
25345 || GET_CODE (body
) == CLOBBER
)
25348 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
25349 if (GET_CODE (body
) != COND_EXEC
)
25351 /* Maximum number of conditionally executed instructions in a block. */
25352 n
= get_attr_ce_count (insn
);
25353 if (arm_condexec_masklen
+ n
> max
)
25356 predicate
= COND_EXEC_TEST (body
);
25357 code
= get_arm_condition_code (predicate
);
25358 mask
= (1 << n
) - 1;
25359 if (arm_current_cc
== code
)
25360 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
25361 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
25364 arm_condexec_count
++;
25365 arm_condexec_masklen
+= n
;
25367 /* A jump must be the last instruction in a conditional block. */
25371 /* Restore recog_data (getting the attributes of other insns can
25372 destroy this array, but final.cc assumes that it remains intact
25373 across this call). */
25374 extract_constrain_insn_cached (first_insn
);
25378 arm_final_prescan_insn (rtx_insn
*insn
)
25380 /* BODY will hold the body of INSN. */
25381 rtx body
= PATTERN (insn
);
25383 /* This will be 1 if trying to repeat the trick, and things need to be
25384 reversed if it appears to fail. */
25387 /* If we start with a return insn, we only succeed if we find another one. */
25388 int seeking_return
= 0;
25389 enum rtx_code return_code
= UNKNOWN
;
25391 /* START_INSN will hold the insn from where we start looking. This is the
25392 first insn after the following code_label if REVERSE is true. */
25393 rtx_insn
*start_insn
= insn
;
25395 /* If in state 4, check if the target branch is reached, in order to
25396 change back to state 0. */
25397 if (arm_ccfsm_state
== 4)
25399 if (insn
== arm_target_insn
)
25401 arm_target_insn
= NULL
;
25402 arm_ccfsm_state
= 0;
25407 /* If in state 3, it is possible to repeat the trick, if this insn is an
25408 unconditional branch to a label, and immediately following this branch
25409 is the previous target label which is only used once, and the label this
25410 branch jumps to is not too far off. */
25411 if (arm_ccfsm_state
== 3)
25413 if (simplejump_p (insn
))
25415 start_insn
= next_nonnote_insn (start_insn
);
25416 if (BARRIER_P (start_insn
))
25418 /* XXX Isn't this always a barrier? */
25419 start_insn
= next_nonnote_insn (start_insn
);
25421 if (LABEL_P (start_insn
)
25422 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
25423 && LABEL_NUSES (start_insn
) == 1)
25428 else if (ANY_RETURN_P (body
))
25430 start_insn
= next_nonnote_insn (start_insn
);
25431 if (BARRIER_P (start_insn
))
25432 start_insn
= next_nonnote_insn (start_insn
);
25433 if (LABEL_P (start_insn
)
25434 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
25435 && LABEL_NUSES (start_insn
) == 1)
25438 seeking_return
= 1;
25439 return_code
= GET_CODE (body
);
25448 gcc_assert (!arm_ccfsm_state
|| reverse
);
25449 if (!JUMP_P (insn
))
25452 /* This jump might be paralleled with a clobber of the condition codes
25453 the jump should always come first */
25454 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
25455 body
= XVECEXP (body
, 0, 0);
25458 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
25459 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
25462 int fail
= FALSE
, succeed
= FALSE
;
25463 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25464 int then_not_else
= TRUE
;
25465 rtx_insn
*this_insn
= start_insn
;
25468 /* Register the insn jumped to. */
25471 if (!seeking_return
)
25472 label
= XEXP (SET_SRC (body
), 0);
25474 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
25475 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
25476 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
25478 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
25479 then_not_else
= FALSE
;
25481 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
25483 seeking_return
= 1;
25484 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
25486 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
25488 seeking_return
= 1;
25489 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
25490 then_not_else
= FALSE
;
25493 gcc_unreachable ();
25495 /* See how many insns this branch skips, and what kind of insns. If all
25496 insns are okay, and the label or unconditional branch to the same
25497 label is not too far away, succeed. */
25498 for (insns_skipped
= 0;
25499 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
25503 this_insn
= next_nonnote_insn (this_insn
);
25507 switch (GET_CODE (this_insn
))
25510 /* Succeed if it is the target label, otherwise fail since
25511 control falls in from somewhere else. */
25512 if (this_insn
== label
)
25514 arm_ccfsm_state
= 1;
25522 /* Succeed if the following insn is the target label.
25524 If return insns are used then the last insn in a function
25525 will be a barrier. */
25526 this_insn
= next_nonnote_insn (this_insn
);
25527 if (this_insn
&& this_insn
== label
)
25529 arm_ccfsm_state
= 1;
25537 /* The AAPCS says that conditional calls should not be
25538 used since they make interworking inefficient (the
25539 linker can't transform BL<cond> into BLX). That's
25540 only a problem if the machine has BLX. */
25547 /* Succeed if the following insn is the target label, or
25548 if the following two insns are a barrier and the
25550 this_insn
= next_nonnote_insn (this_insn
);
25551 if (this_insn
&& BARRIER_P (this_insn
))
25552 this_insn
= next_nonnote_insn (this_insn
);
25554 if (this_insn
&& this_insn
== label
25555 && insns_skipped
< max_insns_skipped
)
25557 arm_ccfsm_state
= 1;
25565 /* If this is an unconditional branch to the same label, succeed.
25566 If it is to another label, do nothing. If it is conditional,
25568 /* XXX Probably, the tests for SET and the PC are
25571 scanbody
= PATTERN (this_insn
);
25572 if (GET_CODE (scanbody
) == SET
25573 && GET_CODE (SET_DEST (scanbody
)) == PC
)
25575 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
25576 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
25578 arm_ccfsm_state
= 2;
25581 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
25584 /* Fail if a conditional return is undesirable (e.g. on a
25585 StrongARM), but still allow this if optimizing for size. */
25586 else if (GET_CODE (scanbody
) == return_code
25587 && !use_return_insn (TRUE
, NULL
)
25590 else if (GET_CODE (scanbody
) == return_code
)
25592 arm_ccfsm_state
= 2;
25595 else if (GET_CODE (scanbody
) == PARALLEL
)
25597 switch (get_attr_conds (this_insn
))
25607 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
25612 /* Instructions using or affecting the condition codes make it
25614 scanbody
= PATTERN (this_insn
);
25615 if (!(GET_CODE (scanbody
) == SET
25616 || GET_CODE (scanbody
) == PARALLEL
)
25617 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
25627 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
25628 arm_target_label
= CODE_LABEL_NUMBER (label
);
25631 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
25633 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
25635 this_insn
= next_nonnote_insn (this_insn
);
25636 gcc_assert (!this_insn
25637 || (!BARRIER_P (this_insn
)
25638 && !LABEL_P (this_insn
)));
25642 /* Oh, dear! we ran off the end.. give up. */
25643 extract_constrain_insn_cached (insn
);
25644 arm_ccfsm_state
= 0;
25645 arm_target_insn
= NULL
;
25648 arm_target_insn
= this_insn
;
25651 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25654 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
25656 if (reverse
|| then_not_else
)
25657 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
25660 /* Restore recog_data (getting the attributes of other insns can
25661 destroy this array, but final.cc assumes that it remains intact
25662 across this call. */
25663 extract_constrain_insn_cached (insn
);
25667 /* Output IT instructions. */
25669 thumb2_asm_output_opcode (FILE * stream
)
25674 if (arm_condexec_mask
)
25676 for (n
= 0; n
< arm_condexec_masklen
; n
++)
25677 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
25679 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
25680 arm_condition_codes
[arm_current_cc
]);
25681 arm_condexec_mask
= 0;
25685 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25686 UNITS_PER_WORD bytes wide. */
25687 static unsigned int
25688 arm_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
25690 if (IS_VPR_REGNUM (regno
))
25691 return CEIL (GET_MODE_SIZE (mode
), 2);
25694 && regno
> PC_REGNUM
25695 && regno
!= FRAME_POINTER_REGNUM
25696 && regno
!= ARG_POINTER_REGNUM
25697 && !IS_VFP_REGNUM (regno
))
25700 return ARM_NUM_REGS (mode
);
25703 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25705 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
25707 if (GET_MODE_CLASS (mode
) == MODE_CC
)
25708 return (regno
== CC_REGNUM
25709 || (TARGET_VFP_BASE
25710 && regno
== VFPCC_REGNUM
));
25712 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
25715 if (IS_VPR_REGNUM (regno
))
25716 return VALID_MVE_PRED_MODE (mode
);
25719 /* For the Thumb we only allow values bigger than SImode in
25720 registers 0 - 6, so that there is always a second low
25721 register available to hold the upper part of the value.
25722 We probably we ought to ensure that the register is the
25723 start of an even numbered register pair. */
25724 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
25726 if (TARGET_VFP_BASE
&& IS_VFP_REGNUM (regno
))
25728 if (mode
== DFmode
|| mode
== DImode
)
25729 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
25731 if (mode
== HFmode
|| mode
== BFmode
|| mode
== HImode
25732 || mode
== SFmode
|| mode
== SImode
)
25733 return VFP_REGNO_OK_FOR_SINGLE (regno
);
25736 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
25737 || (VALID_NEON_QREG_MODE (mode
)
25738 && NEON_REGNO_OK_FOR_QUAD (regno
))
25739 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
25740 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
25741 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25742 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
25743 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
25744 if (TARGET_HAVE_MVE
)
25745 return ((VALID_MVE_MODE (mode
) && NEON_REGNO_OK_FOR_QUAD (regno
))
25746 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25747 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8)));
25752 if (TARGET_REALLY_IWMMXT
)
25754 if (IS_IWMMXT_GR_REGNUM (regno
))
25755 return mode
== SImode
;
25757 if (IS_IWMMXT_REGNUM (regno
))
25758 return VALID_IWMMXT_REG_MODE (mode
);
25761 /* We allow almost any value to be stored in the general registers.
25762 Restrict doubleword quantities to even register pairs in ARM state
25763 so that we can use ldrd. The same restriction applies for MVE
25764 in order to support Armv8.1-M Mainline instructions.
25765 Do not allow very large Neon structure opaque modes in general
25766 registers; they would use too many. */
25767 if (regno
<= LAST_ARM_REGNUM
)
25769 if (ARM_NUM_REGS (mode
) > 4)
25772 if (TARGET_THUMB2
&& !(TARGET_HAVE_MVE
|| TARGET_CDE
))
25775 return !((TARGET_LDRD
|| TARGET_CDE
)
25776 && GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
25779 if (regno
== FRAME_POINTER_REGNUM
25780 || regno
== ARG_POINTER_REGNUM
)
25781 /* We only allow integers in the fake hard registers. */
25782 return GET_MODE_CLASS (mode
) == MODE_INT
;
25787 /* Implement TARGET_MODES_TIEABLE_P. */
25790 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
25792 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
25795 if (TARGET_HAVE_MVE
25796 && (VALID_MVE_PRED_MODE (mode1
) && VALID_MVE_PRED_MODE (mode2
)))
25799 /* We specifically want to allow elements of "structure" modes to
25800 be tieable to the structure. This more general condition allows
25801 other rarer situations too. */
25803 && (VALID_NEON_DREG_MODE (mode1
)
25804 || VALID_NEON_QREG_MODE (mode1
)
25805 || VALID_NEON_STRUCT_MODE (mode1
))
25806 && (VALID_NEON_DREG_MODE (mode2
)
25807 || VALID_NEON_QREG_MODE (mode2
)
25808 || VALID_NEON_STRUCT_MODE (mode2
)))
25809 || (TARGET_HAVE_MVE
25810 && (VALID_MVE_MODE (mode1
)
25811 || VALID_MVE_STRUCT_MODE (mode1
))
25812 && (VALID_MVE_MODE (mode2
)
25813 || VALID_MVE_STRUCT_MODE (mode2
))))
25819 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25820 not used in arm mode. */
25823 arm_regno_class (int regno
)
25825 if (regno
== PC_REGNUM
)
25828 if (IS_VPR_REGNUM (regno
))
25831 if (IS_PAC_REGNUM (regno
))
25836 if (regno
== STACK_POINTER_REGNUM
)
25838 if (regno
== CC_REGNUM
)
25845 if (TARGET_THUMB2
&& regno
< 8)
25848 if ( regno
<= LAST_ARM_REGNUM
25849 || regno
== FRAME_POINTER_REGNUM
25850 || regno
== ARG_POINTER_REGNUM
)
25851 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
25853 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
25854 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
25856 if (IS_VFP_REGNUM (regno
))
25858 if (regno
<= D7_VFP_REGNUM
)
25859 return VFP_D0_D7_REGS
;
25860 else if (regno
<= LAST_LO_VFP_REGNUM
)
25861 return VFP_LO_REGS
;
25863 return VFP_HI_REGS
;
25866 if (IS_IWMMXT_REGNUM (regno
))
25867 return IWMMXT_REGS
;
25869 if (IS_IWMMXT_GR_REGNUM (regno
))
25870 return IWMMXT_GR_REGS
;
25875 /* Handle a special case when computing the offset
25876 of an argument from the frame pointer. */
25878 arm_debugger_arg_offset (int value
, rtx addr
)
25882 /* We are only interested if dbxout_parms() failed to compute the offset. */
25886 /* We can only cope with the case where the address is held in a register. */
25890 /* If we are using the frame pointer to point at the argument, then
25891 an offset of 0 is correct. */
25892 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
25895 /* If we are using the stack pointer to point at the
25896 argument, then an offset of 0 is correct. */
25897 /* ??? Check this is consistent with thumb2 frame layout. */
25898 if ((TARGET_THUMB
|| !frame_pointer_needed
)
25899 && REGNO (addr
) == SP_REGNUM
)
25902 /* Oh dear. The argument is pointed to by a register rather
25903 than being held in a register, or being stored at a known
25904 offset from the frame pointer. Since GDB only understands
25905 those two kinds of argument we must translate the address
25906 held in the register into an offset from the frame pointer.
25907 We do this by searching through the insns for the function
25908 looking to see where this register gets its value. If the
25909 register is initialized from the frame pointer plus an offset
25910 then we are in luck and we can continue, otherwise we give up.
25912 This code is exercised by producing debugging information
25913 for a function with arguments like this:
25915 double func (double a, double b, int c, double d) {return d;}
25917 Without this code the stab for parameter 'd' will be set to
25918 an offset of 0 from the frame pointer, rather than 8. */
25920 /* The if() statement says:
25922 If the insn is a normal instruction
25923 and if the insn is setting the value in a register
25924 and if the register being set is the register holding the address of the argument
25925 and if the address is computing by an addition
25926 that involves adding to a register
25927 which is the frame pointer
25932 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
25934 if ( NONJUMP_INSN_P (insn
)
25935 && GET_CODE (PATTERN (insn
)) == SET
25936 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
25937 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
25938 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
25939 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25940 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
25943 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
25952 warning (0, "unable to compute real location of stacked parameter");
25953 value
= 8; /* XXX magic hack */
25959 /* Implement TARGET_PROMOTED_TYPE. */
25962 arm_promoted_type (const_tree t
)
25964 if (SCALAR_FLOAT_TYPE_P (t
)
25965 && TYPE_PRECISION (t
) == 16
25966 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
25967 return float_type_node
;
25971 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25972 This simply adds HFmode as a supported mode; even though we don't
25973 implement arithmetic on this type directly, it's supported by
25974 optabs conversions, much the way the double-word arithmetic is
25975 special-cased in the default hook. */
25978 arm_scalar_mode_supported_p (scalar_mode mode
)
25980 if (mode
== HFmode
)
25981 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
25982 else if (ALL_FIXED_POINT_MODE_P (mode
))
25985 return default_scalar_mode_supported_p (mode
);
25988 /* Set the value of FLT_EVAL_METHOD.
25989 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25991 0: evaluate all operations and constants, whose semantic type has at
25992 most the range and precision of type float, to the range and
25993 precision of float; evaluate all other operations and constants to
25994 the range and precision of the semantic type;
25996 N, where _FloatN is a supported interchange floating type
25997 evaluate all operations and constants, whose semantic type has at
25998 most the range and precision of _FloatN type, to the range and
25999 precision of the _FloatN type; evaluate all other operations and
26000 constants to the range and precision of the semantic type;
26002 If we have the ARMv8.2-A extensions then we support _Float16 in native
26003 precision, so we should set this to 16. Otherwise, we support the type,
26004 but want to evaluate expressions in float precision, so set this to
26007 static enum flt_eval_method
26008 arm_excess_precision (enum excess_precision_type type
)
26012 case EXCESS_PRECISION_TYPE_FAST
:
26013 case EXCESS_PRECISION_TYPE_STANDARD
:
26014 /* We can calculate either in 16-bit range and precision or
26015 32-bit range and precision. Make that decision based on whether
26016 we have native support for the ARMv8.2-A 16-bit floating-point
26017 instructions or not. */
26018 return (TARGET_VFP_FP16INST
26019 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
26020 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
26021 case EXCESS_PRECISION_TYPE_IMPLICIT
:
26022 case EXCESS_PRECISION_TYPE_FLOAT16
:
26023 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
26025 gcc_unreachable ();
26027 return FLT_EVAL_METHOD_UNPREDICTABLE
;
26031 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
26032 _Float16 if we are using anything other than ieee format for 16-bit
26033 floating point. Otherwise, punt to the default implementation. */
26034 static opt_scalar_float_mode
26035 arm_floatn_mode (int n
, bool extended
)
26037 if (!extended
&& n
== 16)
26039 if (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
)
26041 return opt_scalar_float_mode ();
26044 return default_floatn_mode (n
, extended
);
26048 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
26049 not to early-clobber SRC registers in the process.
26051 We assume that the operands described by SRC and DEST represent a
26052 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
26053 number of components into which the copy has been decomposed. */
26055 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
26059 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
26060 || REGNO (operands
[0]) < REGNO (operands
[1]))
26062 for (i
= 0; i
< count
; i
++)
26064 operands
[2 * i
] = dest
[i
];
26065 operands
[2 * i
+ 1] = src
[i
];
26070 for (i
= 0; i
< count
; i
++)
26072 operands
[2 * i
] = dest
[count
- i
- 1];
26073 operands
[2 * i
+ 1] = src
[count
- i
- 1];
26078 /* Split operands into moves from op[1] + op[2] into op[0]. */
26081 neon_split_vcombine (rtx operands
[3])
26083 unsigned int dest
= REGNO (operands
[0]);
26084 unsigned int src1
= REGNO (operands
[1]);
26085 unsigned int src2
= REGNO (operands
[2]);
26086 machine_mode halfmode
= GET_MODE (operands
[1]);
26087 unsigned int halfregs
= REG_NREGS (operands
[1]);
26088 rtx destlo
, desthi
;
26090 if (src1
== dest
&& src2
== dest
+ halfregs
)
26092 /* No-op move. Can't split to nothing; emit something. */
26093 emit_note (NOTE_INSN_DELETED
);
26097 /* Preserve register attributes for variable tracking. */
26098 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
26099 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
26100 GET_MODE_SIZE (halfmode
));
26102 /* Special case of reversed high/low parts. Use VSWP. */
26103 if (src2
== dest
&& src1
== dest
+ halfregs
)
26105 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
26106 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
26107 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
26111 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
26113 /* Try to avoid unnecessary moves if part of the result
26114 is in the right place already. */
26116 emit_move_insn (destlo
, operands
[1]);
26117 if (src2
!= dest
+ halfregs
)
26118 emit_move_insn (desthi
, operands
[2]);
26122 if (src2
!= dest
+ halfregs
)
26123 emit_move_insn (desthi
, operands
[2]);
26125 emit_move_insn (destlo
, operands
[1]);
26129 /* Return the number (counting from 0) of
26130 the least significant set bit in MASK. */
26133 number_of_first_bit_set (unsigned mask
)
26135 return ctz_hwi (mask
);
26138 /* Like emit_multi_reg_push, but allowing for a different set of
26139 registers to be described as saved. MASK is the set of registers
26140 to be saved; REAL_REGS is the set of registers to be described as
26141 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26144 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
26146 unsigned long regno
;
26147 rtx par
[10], tmp
, reg
;
26151 /* Build the parallel of the registers actually being stored. */
26152 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
26154 regno
= ctz_hwi (mask
);
26155 reg
= gen_rtx_REG (SImode
, regno
);
26158 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
26160 tmp
= gen_rtx_USE (VOIDmode
, reg
);
26165 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26166 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
26167 tmp
= gen_frame_mem (BLKmode
, tmp
);
26168 tmp
= gen_rtx_SET (tmp
, par
[0]);
26171 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
26172 insn
= emit_insn (tmp
);
26174 /* Always build the stack adjustment note for unwind info. */
26175 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26176 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
26179 /* Build the parallel of the registers recorded as saved for unwind. */
26180 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
26182 regno
= ctz_hwi (real_regs
);
26183 reg
= gen_rtx_REG (SImode
, regno
);
26185 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
26186 tmp
= gen_frame_mem (SImode
, tmp
);
26187 tmp
= gen_rtx_SET (tmp
, reg
);
26188 RTX_FRAME_RELATED_P (tmp
) = 1;
26196 RTX_FRAME_RELATED_P (par
[0]) = 1;
26197 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
26200 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
26205 /* Emit code to push or pop registers to or from the stack. F is the
26206 assembly file. MASK is the registers to pop. */
26208 thumb_pop (FILE *f
, unsigned long mask
)
26211 int lo_mask
= mask
& 0xFF;
26215 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
26217 /* Special case. Do not generate a POP PC statement here, do it in
26219 thumb_exit (f
, -1);
26223 fprintf (f
, "\tpop\t{");
26225 /* Look at the low registers first. */
26226 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
26230 asm_fprintf (f
, "%r", regno
);
26232 if ((lo_mask
& ~1) != 0)
26237 if (mask
& (1 << PC_REGNUM
))
26239 /* Catch popping the PC. */
26240 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
26241 || IS_CMSE_ENTRY (arm_current_func_type ()))
26243 /* The PC is never poped directly, instead
26244 it is popped into r3 and then BX is used. */
26245 fprintf (f
, "}\n");
26247 thumb_exit (f
, -1);
26256 asm_fprintf (f
, "%r", PC_REGNUM
);
26260 fprintf (f
, "}\n");
26263 /* Generate code to return from a thumb function.
26264 If 'reg_containing_return_addr' is -1, then the return address is
26265 actually on the stack, at the stack pointer.
26267 Note: do not forget to update length attribute of corresponding insn pattern
26268 when changing assembly output (eg. length attribute of epilogue_insns when
26269 updating Armv8-M Baseline Security Extensions register clearing
26272 thumb_exit (FILE *f
, int reg_containing_return_addr
)
26274 unsigned regs_available_for_popping
;
26275 unsigned regs_to_pop
;
26277 unsigned available
;
26281 int restore_a4
= FALSE
;
26283 /* Compute the registers we need to pop. */
26287 if (reg_containing_return_addr
== -1)
26289 regs_to_pop
|= 1 << LR_REGNUM
;
26293 if (TARGET_BACKTRACE
)
26295 /* Restore the (ARM) frame pointer and stack pointer. */
26296 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
26300 /* If there is nothing to pop then just emit the BX instruction and
26302 if (pops_needed
== 0)
26304 if (crtl
->calls_eh_return
)
26305 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26307 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26309 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26310 emitted by cmse_nonsecure_entry_clear_before_return (). */
26311 if (!TARGET_HAVE_FPCXT_CMSE
)
26312 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
26313 reg_containing_return_addr
);
26314 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
26317 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26320 /* Otherwise if we are not supporting interworking and we have not created
26321 a backtrace structure and the function was not entered in ARM mode then
26322 just pop the return address straight into the PC. */
26323 else if (!TARGET_INTERWORK
26324 && !TARGET_BACKTRACE
26325 && !is_called_in_ARM_mode (current_function_decl
)
26326 && !crtl
->calls_eh_return
26327 && !IS_CMSE_ENTRY (arm_current_func_type ()))
26329 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
26333 /* Find out how many of the (return) argument registers we can corrupt. */
26334 regs_available_for_popping
= 0;
26336 /* If returning via __builtin_eh_return, the bottom three registers
26337 all contain information needed for the return. */
26338 if (crtl
->calls_eh_return
)
26342 /* If we can deduce the registers used from the function's
26343 return value. This is more reliable that examining
26344 df_regs_ever_live_p () because that will be set if the register is
26345 ever used in the function, not just if the register is used
26346 to hold a return value. */
26348 if (crtl
->return_rtx
!= 0)
26349 mode
= GET_MODE (crtl
->return_rtx
);
26351 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
26353 size
= GET_MODE_SIZE (mode
);
26357 /* In a void function we can use any argument register.
26358 In a function that returns a structure on the stack
26359 we can use the second and third argument registers. */
26360 if (mode
== VOIDmode
)
26361 regs_available_for_popping
=
26362 (1 << ARG_REGISTER (1))
26363 | (1 << ARG_REGISTER (2))
26364 | (1 << ARG_REGISTER (3));
26366 regs_available_for_popping
=
26367 (1 << ARG_REGISTER (2))
26368 | (1 << ARG_REGISTER (3));
26370 else if (size
<= 4)
26371 regs_available_for_popping
=
26372 (1 << ARG_REGISTER (2))
26373 | (1 << ARG_REGISTER (3));
26374 else if (size
<= 8)
26375 regs_available_for_popping
=
26376 (1 << ARG_REGISTER (3));
26379 /* Match registers to be popped with registers into which we pop them. */
26380 for (available
= regs_available_for_popping
,
26381 required
= regs_to_pop
;
26382 required
!= 0 && available
!= 0;
26383 available
&= ~(available
& - available
),
26384 required
&= ~(required
& - required
))
26387 /* If we have any popping registers left over, remove them. */
26389 regs_available_for_popping
&= ~available
;
26391 /* Otherwise if we need another popping register we can use
26392 the fourth argument register. */
26393 else if (pops_needed
)
26395 /* If we have not found any free argument registers and
26396 reg a4 contains the return address, we must move it. */
26397 if (regs_available_for_popping
== 0
26398 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
26400 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26401 reg_containing_return_addr
= LR_REGNUM
;
26403 else if (size
> 12)
26405 /* Register a4 is being used to hold part of the return value,
26406 but we have dire need of a free, low register. */
26409 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
26412 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
26414 /* The fourth argument register is available. */
26415 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
26421 /* Pop as many registers as we can. */
26422 thumb_pop (f
, regs_available_for_popping
);
26424 /* Process the registers we popped. */
26425 if (reg_containing_return_addr
== -1)
26427 /* The return address was popped into the lowest numbered register. */
26428 regs_to_pop
&= ~(1 << LR_REGNUM
);
26430 reg_containing_return_addr
=
26431 number_of_first_bit_set (regs_available_for_popping
);
26433 /* Remove this register for the mask of available registers, so that
26434 the return address will not be corrupted by further pops. */
26435 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
26438 /* If we popped other registers then handle them here. */
26439 if (regs_available_for_popping
)
26443 /* Work out which register currently contains the frame pointer. */
26444 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26446 /* Move it into the correct place. */
26447 asm_fprintf (f
, "\tmov\t%r, %r\n",
26448 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
26450 /* (Temporarily) remove it from the mask of popped registers. */
26451 regs_available_for_popping
&= ~(1 << frame_pointer
);
26452 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
26454 if (regs_available_for_popping
)
26458 /* We popped the stack pointer as well,
26459 find the register that contains it. */
26460 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26462 /* Move it into the stack register. */
26463 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
26465 /* At this point we have popped all necessary registers, so
26466 do not worry about restoring regs_available_for_popping
26467 to its correct value:
26469 assert (pops_needed == 0)
26470 assert (regs_available_for_popping == (1 << frame_pointer))
26471 assert (regs_to_pop == (1 << STACK_POINTER)) */
26475 /* Since we have just move the popped value into the frame
26476 pointer, the popping register is available for reuse, and
26477 we know that we still have the stack pointer left to pop. */
26478 regs_available_for_popping
|= (1 << frame_pointer
);
26482 /* If we still have registers left on the stack, but we no longer have
26483 any registers into which we can pop them, then we must move the return
26484 address into the link register and make available the register that
26486 if (regs_available_for_popping
== 0 && pops_needed
> 0)
26488 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
26490 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
26491 reg_containing_return_addr
);
26493 reg_containing_return_addr
= LR_REGNUM
;
26496 /* If we have registers left on the stack then pop some more.
26497 We know that at most we will want to pop FP and SP. */
26498 if (pops_needed
> 0)
26503 thumb_pop (f
, regs_available_for_popping
);
26505 /* We have popped either FP or SP.
26506 Move whichever one it is into the correct register. */
26507 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26508 move_to
= number_of_first_bit_set (regs_to_pop
);
26510 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
26514 /* If we still have not popped everything then we must have only
26515 had one register available to us and we are now popping the SP. */
26516 if (pops_needed
> 0)
26520 thumb_pop (f
, regs_available_for_popping
);
26522 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26524 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
26526 assert (regs_to_pop == (1 << STACK_POINTER))
26527 assert (pops_needed == 1)
26531 /* If necessary restore the a4 register. */
26534 if (reg_containing_return_addr
!= LR_REGNUM
)
26536 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26537 reg_containing_return_addr
= LR_REGNUM
;
26540 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
26543 if (crtl
->calls_eh_return
)
26544 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26546 /* Return to caller. */
26547 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26549 /* This is for the cases where LR is not being used to contain the return
26550 address. It may therefore contain information that we might not want
26551 to leak, hence it must be cleared. The value in R0 will never be a
26552 secret at this point, so it is safe to use it, see the clearing code
26553 in cmse_nonsecure_entry_clear_before_return (). */
26554 if (reg_containing_return_addr
!= LR_REGNUM
)
26555 asm_fprintf (f
, "\tmov\tlr, r0\n");
26557 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26558 by cmse_nonsecure_entry_clear_before_return (). */
26559 if (!TARGET_HAVE_FPCXT_CMSE
)
26560 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
26561 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
26564 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26567 /* Scan INSN just before assembler is output for it.
26568 For Thumb-1, we track the status of the condition codes; this
26569 information is used in the cbranchsi4_insn pattern. */
26571 thumb1_final_prescan_insn (rtx_insn
*insn
)
26573 if (flag_print_asm_name
)
26574 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
26575 INSN_ADDRESSES (INSN_UID (insn
)));
26576 /* Don't overwrite the previous setter when we get to a cbranch. */
26577 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
26579 enum attr_conds conds
;
26581 if (cfun
->machine
->thumb1_cc_insn
)
26583 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
26584 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
26587 conds
= get_attr_conds (insn
);
26588 if (conds
== CONDS_SET
)
26590 rtx set
= single_set (insn
);
26591 cfun
->machine
->thumb1_cc_insn
= insn
;
26592 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
26593 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
26594 cfun
->machine
->thumb1_cc_mode
= CC_NZmode
;
26595 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
26597 rtx src1
= XEXP (SET_SRC (set
), 1);
26598 if (src1
== const0_rtx
)
26599 cfun
->machine
->thumb1_cc_mode
= CCmode
;
26601 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
26603 /* Record the src register operand instead of dest because
26604 cprop_hardreg pass propagates src. */
26605 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
26608 else if (conds
!= CONDS_NOCOND
)
26609 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
26612 /* Check if unexpected far jump is used. */
26613 if (cfun
->machine
->lr_save_eliminated
26614 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26615 internal_error("Unexpected thumb1 far jump");
26619 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
26621 unsigned HOST_WIDE_INT mask
= 0xff;
26624 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
26625 if (val
== 0) /* XXX */
26628 for (i
= 0; i
< 25; i
++)
26629 if ((val
& (mask
<< i
)) == val
)
26635 /* Returns nonzero if the current function contains,
26636 or might contain a far jump. */
26638 thumb_far_jump_used_p (void)
26641 bool far_jump
= false;
26642 unsigned int func_size
= 0;
26644 /* If we have already decided that far jumps may be used,
26645 do not bother checking again, and always return true even if
26646 it turns out that they are not being used. Once we have made
26647 the decision that far jumps are present (and that hence the link
26648 register will be pushed onto the stack) we cannot go back on it. */
26649 if (cfun
->machine
->far_jump_used
)
26652 /* If this function is not being called from the prologue/epilogue
26653 generation code then it must be being called from the
26654 INITIAL_ELIMINATION_OFFSET macro. */
26655 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
26657 /* In this case we know that we are being asked about the elimination
26658 of the arg pointer register. If that register is not being used,
26659 then there are no arguments on the stack, and we do not have to
26660 worry that a far jump might force the prologue to push the link
26661 register, changing the stack offsets. In this case we can just
26662 return false, since the presence of far jumps in the function will
26663 not affect stack offsets.
26665 If the arg pointer is live (or if it was live, but has now been
26666 eliminated and so set to dead) then we do have to test to see if
26667 the function might contain a far jump. This test can lead to some
26668 false negatives, since before reload is completed, then length of
26669 branch instructions is not known, so gcc defaults to returning their
26670 longest length, which in turn sets the far jump attribute to true.
26672 A false negative will not result in bad code being generated, but it
26673 will result in a needless push and pop of the link register. We
26674 hope that this does not occur too often.
26676 If we need doubleword stack alignment this could affect the other
26677 elimination offsets so we can't risk getting it wrong. */
26678 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
26679 cfun
->machine
->arg_pointer_live
= 1;
26680 else if (!cfun
->machine
->arg_pointer_live
)
26684 /* We should not change far_jump_used during or after reload, as there is
26685 no chance to change stack frame layout. */
26686 if (reload_in_progress
|| reload_completed
)
26689 /* Check to see if the function contains a branch
26690 insn with the far jump attribute set. */
26691 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
26693 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26697 func_size
+= get_attr_length (insn
);
26700 /* Attribute far_jump will always be true for thumb1 before
26701 shorten_branch pass. So checking far_jump attribute before
26702 shorten_branch isn't much useful.
26704 Following heuristic tries to estimate more accurately if a far jump
26705 may finally be used. The heuristic is very conservative as there is
26706 no chance to roll-back the decision of not to use far jump.
26708 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26709 2-byte insn is associated with a 4 byte constant pool. Using
26710 function size 2048/3 as the threshold is conservative enough. */
26713 if ((func_size
* 3) >= 2048)
26715 /* Record the fact that we have decided that
26716 the function does use far jumps. */
26717 cfun
->machine
->far_jump_used
= 1;
26725 /* Return nonzero if FUNC must be entered in ARM mode. */
26727 is_called_in_ARM_mode (tree func
)
26729 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
26731 /* Ignore the problem about functions whose address is taken. */
26732 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
26736 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
26742 /* Given the stack offsets and register mask in OFFSETS, decide how
26743 many additional registers to push instead of subtracting a constant
26744 from SP. For epilogues the principle is the same except we use pop.
26745 FOR_PROLOGUE indicates which we're generating. */
26747 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
26749 HOST_WIDE_INT amount
;
26750 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
26751 /* Extract a mask of the ones we can give to the Thumb's push/pop
26753 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
26754 /* Then count how many other high registers will need to be pushed. */
26755 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26756 int n_free
, reg_base
, size
;
26758 if (!for_prologue
&& frame_pointer_needed
)
26759 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26761 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26763 /* If the stack frame size is 512 exactly, we can save one load
26764 instruction, which should make this a win even when optimizing
26766 if (!optimize_size
&& amount
!= 512)
26769 /* Can't do this if there are high registers to push. */
26770 if (high_regs_pushed
!= 0)
26773 /* Shouldn't do it in the prologue if no registers would normally
26774 be pushed at all. In the epilogue, also allow it if we'll have
26775 a pop insn for the PC. */
26778 || TARGET_BACKTRACE
26779 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
26780 || TARGET_INTERWORK
26781 || crtl
->args
.pretend_args_size
!= 0))
26784 /* Don't do this if thumb_expand_prologue wants to emit instructions
26785 between the push and the stack frame allocation. */
26787 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26788 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
26795 size
= arm_size_return_regs ();
26796 reg_base
= ARM_NUM_INTS (size
);
26797 live_regs_mask
>>= reg_base
;
26800 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
26801 && (for_prologue
|| call_used_or_fixed_reg_p (reg_base
+ n_free
)))
26803 live_regs_mask
>>= 1;
26809 gcc_assert (amount
/ 4 * 4 == amount
);
26811 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
26812 return (amount
- 508) / 4;
26813 if (amount
<= n_free
* 4)
26818 /* The bits which aren't usefully expanded as rtl. */
26820 thumb1_unexpanded_epilogue (void)
26822 arm_stack_offsets
*offsets
;
26824 unsigned long live_regs_mask
= 0;
26825 int high_regs_pushed
= 0;
26827 int had_to_push_lr
;
26830 if (cfun
->machine
->return_used_this_function
!= 0)
26833 if (IS_NAKED (arm_current_func_type ()))
26836 offsets
= arm_get_frame_offsets ();
26837 live_regs_mask
= offsets
->saved_regs_mask
;
26838 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26840 /* If we can deduce the registers used from the function's return value.
26841 This is more reliable that examining df_regs_ever_live_p () because that
26842 will be set if the register is ever used in the function, not just if
26843 the register is used to hold a return value. */
26844 size
= arm_size_return_regs ();
26846 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
26849 unsigned long extra_mask
= (1 << extra_pop
) - 1;
26850 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
26853 /* The prolog may have pushed some high registers to use as
26854 work registers. e.g. the testsuite file:
26855 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26856 compiles to produce:
26857 push {r4, r5, r6, r7, lr}
26861 as part of the prolog. We have to undo that pushing here. */
26863 if (high_regs_pushed
)
26865 unsigned long mask
= live_regs_mask
& 0xff;
26868 mask
|= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26871 /* Oh dear! We have no low registers into which we can pop
26874 ("no low registers available for popping high registers");
26876 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
26877 if (live_regs_mask
& (1 << next_hi_reg
))
26880 while (high_regs_pushed
)
26882 /* Find lo register(s) into which the high register(s) can
26884 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
26886 if (mask
& (1 << regno
))
26887 high_regs_pushed
--;
26888 if (high_regs_pushed
== 0)
26892 if (high_regs_pushed
== 0 && regno
>= 0)
26893 mask
&= ~((1 << regno
) - 1);
26895 /* Pop the values into the low register(s). */
26896 thumb_pop (asm_out_file
, mask
);
26898 /* Move the value(s) into the high registers. */
26899 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
26901 if (mask
& (1 << regno
))
26903 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
26906 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
26908 if (live_regs_mask
& (1 << next_hi_reg
))
26913 live_regs_mask
&= ~0x0f00;
26916 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
26917 live_regs_mask
&= 0xff;
26919 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
26921 /* Pop the return address into the PC. */
26922 if (had_to_push_lr
)
26923 live_regs_mask
|= 1 << PC_REGNUM
;
26925 /* Either no argument registers were pushed or a backtrace
26926 structure was created which includes an adjusted stack
26927 pointer, so just pop everything. */
26928 if (live_regs_mask
)
26929 thumb_pop (asm_out_file
, live_regs_mask
);
26931 /* We have either just popped the return address into the
26932 PC or it is was kept in LR for the entire function.
26933 Note that thumb_pop has already called thumb_exit if the
26934 PC was in the list. */
26935 if (!had_to_push_lr
)
26936 thumb_exit (asm_out_file
, LR_REGNUM
);
26940 /* Pop everything but the return address. */
26941 if (live_regs_mask
)
26942 thumb_pop (asm_out_file
, live_regs_mask
);
26944 if (had_to_push_lr
)
26948 /* We have no free low regs, so save one. */
26949 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
26953 /* Get the return address into a temporary register. */
26954 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
26958 /* Move the return address to lr. */
26959 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
26961 /* Restore the low register. */
26962 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
26967 regno
= LAST_ARG_REGNUM
;
26972 /* Remove the argument registers that were pushed onto the stack. */
26973 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
26974 SP_REGNUM
, SP_REGNUM
,
26975 crtl
->args
.pretend_args_size
);
26977 thumb_exit (asm_out_file
, regno
);
26983 /* Functions to save and restore machine-specific function data. */
26984 static struct machine_function
*
26985 arm_init_machine_status (void)
26987 struct machine_function
*machine
;
26988 machine
= ggc_cleared_alloc
<machine_function
> ();
26990 #if ARM_FT_UNKNOWN != 0
26991 machine
->func_type
= ARM_FT_UNKNOWN
;
26993 machine
->static_chain_stack_bytes
= -1;
26994 machine
->pacspval_needed
= 0;
26998 /* Return an RTX indicating where the return address to the
26999 calling function can be found. */
27001 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
27006 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
27009 /* Do anything needed before RTL is emitted for each function. */
27011 arm_init_expanders (void)
27013 /* Arrange to initialize and mark the machine per-function status. */
27014 init_machine_status
= arm_init_machine_status
;
27016 /* This is to stop the combine pass optimizing away the alignment
27017 adjustment of va_arg. */
27018 /* ??? It is claimed that this should not be necessary. */
27020 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
27023 /* Check that FUNC is called with a different mode. */
27026 arm_change_mode_p (tree func
)
27028 if (TREE_CODE (func
) != FUNCTION_DECL
)
27031 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
27034 callee_tree
= target_option_default_node
;
27036 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
27037 int flags
= callee_opts
->x_target_flags
;
27039 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
27042 /* Like arm_compute_initial_elimination offset. Simpler because there
27043 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27044 to point at the base of the local variables after static stack
27045 space for a function has been allocated. */
27048 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
27050 arm_stack_offsets
*offsets
;
27052 offsets
= arm_get_frame_offsets ();
27056 case ARG_POINTER_REGNUM
:
27059 case STACK_POINTER_REGNUM
:
27060 return offsets
->outgoing_args
- offsets
->saved_args
;
27062 case FRAME_POINTER_REGNUM
:
27063 return offsets
->soft_frame
- offsets
->saved_args
;
27065 case ARM_HARD_FRAME_POINTER_REGNUM
:
27066 return offsets
->saved_regs
- offsets
->saved_args
;
27068 case THUMB_HARD_FRAME_POINTER_REGNUM
:
27069 return offsets
->locals_base
- offsets
->saved_args
;
27072 gcc_unreachable ();
27076 case FRAME_POINTER_REGNUM
:
27079 case STACK_POINTER_REGNUM
:
27080 return offsets
->outgoing_args
- offsets
->soft_frame
;
27082 case ARM_HARD_FRAME_POINTER_REGNUM
:
27083 return offsets
->saved_regs
- offsets
->soft_frame
;
27085 case THUMB_HARD_FRAME_POINTER_REGNUM
:
27086 return offsets
->locals_base
- offsets
->soft_frame
;
27089 gcc_unreachable ();
27094 gcc_unreachable ();
27098 /* Generate the function's prologue. */
27101 thumb1_expand_prologue (void)
27105 HOST_WIDE_INT amount
;
27106 HOST_WIDE_INT size
;
27107 arm_stack_offsets
*offsets
;
27108 unsigned long func_type
;
27110 unsigned long live_regs_mask
;
27111 unsigned long l_mask
;
27112 unsigned high_regs_pushed
= 0;
27113 bool lr_needs_saving
;
27115 func_type
= arm_current_func_type ();
27117 /* Naked functions don't have prologues. */
27118 if (IS_NAKED (func_type
))
27120 if (flag_stack_usage_info
)
27121 current_function_static_stack_size
= 0;
27125 if (IS_INTERRUPT (func_type
))
27127 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
27131 if (is_called_in_ARM_mode (current_function_decl
))
27132 emit_insn (gen_prologue_thumb1_interwork ());
27134 offsets
= arm_get_frame_offsets ();
27135 live_regs_mask
= offsets
->saved_regs_mask
;
27136 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
27138 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27139 l_mask
= live_regs_mask
& 0x40ff;
27140 /* Then count how many other high registers will need to be pushed. */
27141 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
27143 if (crtl
->args
.pretend_args_size
)
27145 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
27147 if (cfun
->machine
->uses_anonymous_args
)
27149 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
27150 unsigned long mask
;
27152 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
27153 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
27155 insn
= thumb1_emit_multi_reg_push (mask
, 0);
27159 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27160 stack_pointer_rtx
, x
));
27162 RTX_FRAME_RELATED_P (insn
) = 1;
27165 if (TARGET_BACKTRACE
)
27167 HOST_WIDE_INT offset
= 0;
27168 unsigned work_register
;
27169 rtx work_reg
, x
, arm_hfp_rtx
;
27171 /* We have been asked to create a stack backtrace structure.
27172 The code looks like this:
27176 0 sub SP, #16 Reserve space for 4 registers.
27177 2 push {R7} Push low registers.
27178 4 add R7, SP, #20 Get the stack pointer before the push.
27179 6 str R7, [SP, #8] Store the stack pointer
27180 (before reserving the space).
27181 8 mov R7, PC Get hold of the start of this code + 12.
27182 10 str R7, [SP, #16] Store it.
27183 12 mov R7, FP Get hold of the current frame pointer.
27184 14 str R7, [SP, #4] Store it.
27185 16 mov R7, LR Get hold of the current return address.
27186 18 str R7, [SP, #12] Store it.
27187 20 add R7, SP, #16 Point at the start of the
27188 backtrace structure.
27189 22 mov FP, R7 Put this value into the frame pointer. */
27191 work_register
= thumb_find_work_register (live_regs_mask
);
27192 work_reg
= gen_rtx_REG (SImode
, work_register
);
27193 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
27195 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27196 stack_pointer_rtx
, GEN_INT (-16)));
27197 RTX_FRAME_RELATED_P (insn
) = 1;
27201 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
27202 RTX_FRAME_RELATED_P (insn
) = 1;
27203 lr_needs_saving
= false;
27205 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
27208 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
27209 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27211 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
27212 x
= gen_frame_mem (SImode
, x
);
27213 emit_move_insn (x
, work_reg
);
27215 /* Make sure that the instruction fetching the PC is in the right place
27216 to calculate "start of backtrace creation code + 12". */
27217 /* ??? The stores using the common WORK_REG ought to be enough to
27218 prevent the scheduler from doing anything weird. Failing that
27219 we could always move all of the following into an UNSPEC_VOLATILE. */
27222 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27223 emit_move_insn (work_reg
, x
);
27225 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27226 x
= gen_frame_mem (SImode
, x
);
27227 emit_move_insn (x
, work_reg
);
27229 emit_move_insn (work_reg
, arm_hfp_rtx
);
27231 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27232 x
= gen_frame_mem (SImode
, x
);
27233 emit_move_insn (x
, work_reg
);
27237 emit_move_insn (work_reg
, arm_hfp_rtx
);
27239 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27240 x
= gen_frame_mem (SImode
, x
);
27241 emit_move_insn (x
, work_reg
);
27243 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27244 emit_move_insn (work_reg
, x
);
27246 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27247 x
= gen_frame_mem (SImode
, x
);
27248 emit_move_insn (x
, work_reg
);
27251 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
27252 emit_move_insn (work_reg
, x
);
27254 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
27255 x
= gen_frame_mem (SImode
, x
);
27256 emit_move_insn (x
, work_reg
);
27258 x
= GEN_INT (offset
+ 12);
27259 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27261 emit_move_insn (arm_hfp_rtx
, work_reg
);
27263 /* Optimization: If we are not pushing any low registers but we are going
27264 to push some high registers then delay our first push. This will just
27265 be a push of LR and we can combine it with the push of the first high
27267 else if ((l_mask
& 0xff) != 0
27268 || (high_regs_pushed
== 0 && lr_needs_saving
))
27270 unsigned long mask
= l_mask
;
27271 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
27272 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
27273 RTX_FRAME_RELATED_P (insn
) = 1;
27274 lr_needs_saving
= false;
27277 if (high_regs_pushed
)
27279 unsigned pushable_regs
;
27280 unsigned next_hi_reg
;
27281 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
27282 : crtl
->args
.info
.nregs
;
27283 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
27285 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
27286 if (live_regs_mask
& (1 << next_hi_reg
))
27289 /* Here we need to mask out registers used for passing arguments
27290 even if they can be pushed. This is to avoid using them to
27291 stash the high registers. Such kind of stash may clobber the
27292 use of arguments. */
27293 pushable_regs
= l_mask
& (~arg_regs_mask
);
27294 pushable_regs
|= thumb1_prologue_unused_call_clobbered_lo_regs ();
27296 /* Normally, LR can be used as a scratch register once it has been
27297 saved; but if the function examines its own return address then
27298 the value is still live and we need to avoid using it. */
27299 bool return_addr_live
27300 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
27303 if (lr_needs_saving
|| return_addr_live
)
27304 pushable_regs
&= ~(1 << LR_REGNUM
);
27306 if (pushable_regs
== 0)
27307 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
27309 while (high_regs_pushed
> 0)
27311 unsigned long real_regs_mask
= 0;
27312 unsigned long push_mask
= 0;
27314 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
27316 if (pushable_regs
& (1 << regno
))
27318 emit_move_insn (gen_rtx_REG (SImode
, regno
),
27319 gen_rtx_REG (SImode
, next_hi_reg
));
27321 high_regs_pushed
--;
27322 real_regs_mask
|= (1 << next_hi_reg
);
27323 push_mask
|= (1 << regno
);
27325 if (high_regs_pushed
)
27327 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
27329 if (live_regs_mask
& (1 << next_hi_reg
))
27337 /* If we had to find a work register and we have not yet
27338 saved the LR then add it to the list of regs to push. */
27339 if (lr_needs_saving
)
27341 push_mask
|= 1 << LR_REGNUM
;
27342 real_regs_mask
|= 1 << LR_REGNUM
;
27343 lr_needs_saving
= false;
27344 /* If the return address is not live at this point, we
27345 can add LR to the list of registers that we can use
27347 if (!return_addr_live
)
27348 pushable_regs
|= 1 << LR_REGNUM
;
27351 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
27352 RTX_FRAME_RELATED_P (insn
) = 1;
27356 /* Load the pic register before setting the frame pointer,
27357 so we can use r7 as a temporary work register. */
27358 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
27359 arm_load_pic_register (live_regs_mask
, NULL_RTX
);
27361 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
27362 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
27363 stack_pointer_rtx
);
27365 size
= offsets
->outgoing_args
- offsets
->saved_args
;
27366 if (flag_stack_usage_info
)
27367 current_function_static_stack_size
= size
;
27369 /* If we have a frame, then do stack checking. FIXME: not implemented. */
27370 if ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27371 || flag_stack_clash_protection
)
27373 sorry ("%<-fstack-check=specific%> for Thumb-1");
27375 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27376 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
27381 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27382 GEN_INT (- amount
)));
27383 RTX_FRAME_RELATED_P (insn
) = 1;
27389 /* The stack decrement is too big for an immediate value in a single
27390 insn. In theory we could issue multiple subtracts, but after
27391 three of them it becomes more space efficient to place the full
27392 value in the constant pool and load into a register. (Also the
27393 ARM debugger really likes to see only one stack decrement per
27394 function). So instead we look for a scratch register into which
27395 we can load the decrement, and then we subtract this from the
27396 stack pointer. Unfortunately on the thumb the only available
27397 scratch registers are the argument registers, and we cannot use
27398 these as they may hold arguments to the function. Instead we
27399 attempt to locate a call preserved register which is used by this
27400 function. If we can find one, then we know that it will have
27401 been pushed at the start of the prologue and so we can corrupt
27403 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
27404 if (live_regs_mask
& (1 << regno
))
27407 gcc_assert(regno
<= LAST_LO_REGNUM
);
27409 reg
= gen_rtx_REG (SImode
, regno
);
27411 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
27413 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27414 stack_pointer_rtx
, reg
));
27416 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
27417 plus_constant (Pmode
, stack_pointer_rtx
,
27419 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
27420 RTX_FRAME_RELATED_P (insn
) = 1;
27424 if (frame_pointer_needed
)
27425 thumb_set_frame_pointer (offsets
);
27427 /* If we are profiling, make sure no instructions are scheduled before
27428 the call to mcount. Similarly if the user has requested no
27429 scheduling in the prolog. Similarly if we want non-call exceptions
27430 using the EABI unwinder, to prevent faulting instructions from being
27431 swapped with a stack adjustment. */
27432 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
27433 || (arm_except_unwind_info (&global_options
) == UI_TARGET
27434 && cfun
->can_throw_non_call_exceptions
))
27435 emit_insn (gen_blockage ());
27437 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
27438 if (live_regs_mask
& 0xff)
27439 cfun
->machine
->lr_save_eliminated
= 0;
27442 /* Clear caller saved registers not used to pass return values and leaked
27443 condition flags before exiting a cmse_nonsecure_entry function. */
27446 cmse_nonsecure_entry_clear_before_return (void)
27448 bool clear_vfpregs
= TARGET_HARD_FLOAT
|| TARGET_HAVE_FPCXT_CMSE
;
27449 int regno
, maxregno
= clear_vfpregs
? LAST_VFP_REGNUM
: IP_REGNUM
;
27450 uint32_t padding_bits_to_clear
= 0;
27451 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
27452 rtx r1_reg
, result_rtl
, clearing_reg
= NULL_RTX
;
27455 bitmap_clear (to_clear_bitmap
);
27456 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
27457 bitmap_set_bit (to_clear_bitmap
, IP_REGNUM
);
27459 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27463 int float_bits
= D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1;
27465 bitmap_set_range (to_clear_bitmap
, FIRST_VFP_REGNUM
, float_bits
);
27467 if (!TARGET_HAVE_FPCXT_CMSE
)
27469 /* Make sure we don't clear the two scratch registers used to clear
27470 the relevant FPSCR bits in output_return_instruction. */
27471 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
27472 bitmap_clear_bit (to_clear_bitmap
, IP_REGNUM
);
27473 emit_use (gen_rtx_REG (SImode
, 4));
27474 bitmap_clear_bit (to_clear_bitmap
, 4);
27478 /* If the user has defined registers to be caller saved, these are no longer
27479 restored by the function before returning and must thus be cleared for
27480 security purposes. */
27481 for (regno
= NUM_ARG_REGS
; regno
<= maxregno
; regno
++)
27483 /* We do not touch registers that can be used to pass arguments as per
27484 the AAPCS, since these should never be made callee-saved by user
27486 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
27488 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
27490 if (!callee_saved_reg_p (regno
)
27491 && (!IN_RANGE (regno
, FIRST_VFP_REGNUM
, LAST_VFP_REGNUM
)
27492 || TARGET_HARD_FLOAT
))
27493 bitmap_set_bit (to_clear_bitmap
, regno
);
27496 /* Make sure we do not clear the registers used to return the result in. */
27497 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
27498 if (!VOID_TYPE_P (result_type
))
27500 uint64_t to_clear_return_mask
;
27501 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
27503 /* No need to check that we return in registers, because we don't
27504 support returning on stack yet. */
27505 gcc_assert (REG_P (result_rtl
));
27506 to_clear_return_mask
27507 = compute_not_to_clear_mask (result_type
, result_rtl
, 0,
27508 &padding_bits_to_clear
);
27509 if (to_clear_return_mask
)
27511 gcc_assert ((unsigned) maxregno
< sizeof (long long) * __CHAR_BIT__
);
27512 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
27514 if (to_clear_return_mask
& (1ULL << regno
))
27515 bitmap_clear_bit (to_clear_bitmap
, regno
);
27520 if (padding_bits_to_clear
!= 0)
27522 int to_clear_bitmap_size
= SBITMAP_SIZE ((sbitmap
) to_clear_bitmap
);
27523 auto_sbitmap
to_clear_arg_regs_bitmap (to_clear_bitmap_size
);
27525 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27526 returning a composite type, which only uses r0. Let's make sure that
27527 r1-r3 is cleared too. */
27528 bitmap_clear (to_clear_arg_regs_bitmap
);
27529 bitmap_set_range (to_clear_arg_regs_bitmap
, R1_REGNUM
, NUM_ARG_REGS
- 1);
27530 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap
, to_clear_bitmap
));
27533 /* Clear full registers that leak before returning. */
27534 clearing_reg
= gen_rtx_REG (SImode
, TARGET_THUMB1
? R0_REGNUM
: LR_REGNUM
);
27535 r1_reg
= gen_rtx_REG (SImode
, R0_REGNUM
+ 1);
27536 cmse_clear_registers (to_clear_bitmap
, &padding_bits_to_clear
, 1, r1_reg
,
27540 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27541 POP instruction can be generated. LR should be replaced by PC. All
27542 the checks required are already done by USE_RETURN_INSN (). Hence,
27543 all we really need to check here is if single register is to be
27544 returned, or multiple register return. */
27546 thumb2_expand_return (bool simple_return
)
27549 unsigned long saved_regs_mask
;
27550 arm_stack_offsets
*offsets
;
27552 offsets
= arm_get_frame_offsets ();
27553 saved_regs_mask
= offsets
->saved_regs_mask
;
27555 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27556 if (saved_regs_mask
& (1 << i
))
27559 if (!simple_return
&& saved_regs_mask
)
27561 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27562 functions or adapt code to handle according to ACLE. This path should
27563 not be reachable for cmse_nonsecure_entry functions though we prefer
27564 to assert it for now to ensure that future code changes do not silently
27565 change this behavior. */
27566 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27567 if (arm_current_function_pac_enabled_p ())
27569 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
27570 arm_emit_multi_reg_pop (saved_regs_mask
);
27571 emit_insn (gen_aut_nop ());
27572 emit_jump_insn (simple_return_rtx
);
27574 else if (num_regs
== 1)
27576 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27577 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
27578 rtx addr
= gen_rtx_MEM (SImode
,
27579 gen_rtx_POST_INC (SImode
,
27580 stack_pointer_rtx
));
27581 set_mem_alias_set (addr
, get_frame_alias_set ());
27582 XVECEXP (par
, 0, 0) = ret_rtx
;
27583 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
27584 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
27585 emit_jump_insn (par
);
27589 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
27590 saved_regs_mask
|= (1 << PC_REGNUM
);
27591 arm_emit_multi_reg_pop (saved_regs_mask
);
27596 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27597 cmse_nonsecure_entry_clear_before_return ();
27598 emit_jump_insn (simple_return_rtx
);
27603 thumb1_expand_epilogue (void)
27605 HOST_WIDE_INT amount
;
27606 arm_stack_offsets
*offsets
;
27609 /* Naked functions don't have prologues. */
27610 if (IS_NAKED (arm_current_func_type ()))
27613 offsets
= arm_get_frame_offsets ();
27614 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27616 if (frame_pointer_needed
)
27618 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
27619 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27621 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
27623 gcc_assert (amount
>= 0);
27626 emit_insn (gen_blockage ());
27629 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27630 GEN_INT (amount
)));
27633 /* r3 is always free in the epilogue. */
27634 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
27636 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
27637 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
27641 /* Emit a USE (stack_pointer_rtx), so that
27642 the stack adjustment will not be deleted. */
27643 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27645 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
27646 emit_insn (gen_blockage ());
27648 /* Emit a clobber for each insn that will be restored in the epilogue,
27649 so that flow2 will get register lifetimes correct. */
27650 for (regno
= 0; regno
< 13; regno
++)
27651 if (reg_needs_saving_p (regno
))
27652 emit_clobber (gen_rtx_REG (SImode
, regno
));
27654 if (! df_regs_ever_live_p (LR_REGNUM
))
27655 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
27657 /* Clear all caller-saved regs that are not used to return. */
27658 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27659 cmse_nonsecure_entry_clear_before_return ();
27662 /* Epilogue code for APCS frame. */
27664 arm_expand_epilogue_apcs_frame (bool really_return
)
27666 unsigned long func_type
;
27667 unsigned long saved_regs_mask
;
27670 int floats_from_frame
= 0;
27671 arm_stack_offsets
*offsets
;
27673 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
27674 func_type
= arm_current_func_type ();
27676 /* Get frame offsets for ARM. */
27677 offsets
= arm_get_frame_offsets ();
27678 saved_regs_mask
= offsets
->saved_regs_mask
;
27680 /* Find the offset of the floating-point save area in the frame. */
27682 = (offsets
->saved_args
27683 + arm_compute_static_chain_stack_bytes ()
27686 /* Compute how many core registers saved and how far away the floats are. */
27687 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27688 if (saved_regs_mask
& (1 << i
))
27691 floats_from_frame
+= 4;
27694 if (TARGET_VFP_BASE
)
27697 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
27699 /* The offset is from IP_REGNUM. */
27700 int saved_size
= arm_get_vfp_saved_size ();
27701 if (saved_size
> 0)
27704 floats_from_frame
+= saved_size
;
27705 insn
= emit_insn (gen_addsi3 (ip_rtx
,
27706 hard_frame_pointer_rtx
,
27707 GEN_INT (-floats_from_frame
)));
27708 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
27709 ip_rtx
, hard_frame_pointer_rtx
);
27712 /* Generate VFP register multi-pop. */
27713 start_reg
= FIRST_VFP_REGNUM
;
27715 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
27716 /* Look for a case where a reg does not need restoring. */
27717 if (!reg_needs_saving_p (i
) && !reg_needs_saving_p (i
+ 1))
27719 if (start_reg
!= i
)
27720 arm_emit_vfp_multi_reg_pop (start_reg
,
27721 (i
- start_reg
) / 2,
27722 gen_rtx_REG (SImode
,
27727 /* Restore the remaining regs that we have discovered (or possibly
27728 even all of them, if the conditional in the for loop never
27730 if (start_reg
!= i
)
27731 arm_emit_vfp_multi_reg_pop (start_reg
,
27732 (i
- start_reg
) / 2,
27733 gen_rtx_REG (SImode
, IP_REGNUM
));
27738 /* The frame pointer is guaranteed to be non-double-word aligned, as
27739 it is set to double-word-aligned old_stack_pointer - 4. */
27741 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
27743 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
27744 if (reg_needs_saving_p (i
))
27746 rtx addr
= gen_frame_mem (V2SImode
,
27747 plus_constant (Pmode
, hard_frame_pointer_rtx
,
27749 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27750 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27751 gen_rtx_REG (V2SImode
, i
),
27757 /* saved_regs_mask should contain IP which contains old stack pointer
27758 at the time of activation creation. Since SP and IP are adjacent registers,
27759 we can restore the value directly into SP. */
27760 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
27761 saved_regs_mask
&= ~(1 << IP_REGNUM
);
27762 saved_regs_mask
|= (1 << SP_REGNUM
);
27764 /* There are two registers left in saved_regs_mask - LR and PC. We
27765 only need to restore LR (the return address), but to
27766 save time we can load it directly into PC, unless we need a
27767 special function exit sequence, or we are not really returning. */
27769 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
27770 && !crtl
->calls_eh_return
)
27771 /* Delete LR from the register mask, so that LR on
27772 the stack is loaded into the PC in the register mask. */
27773 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27775 saved_regs_mask
&= ~(1 << PC_REGNUM
);
27777 num_regs
= bit_count (saved_regs_mask
);
27778 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
27781 emit_insn (gen_blockage ());
27782 /* Unwind the stack to just below the saved registers. */
27783 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27784 hard_frame_pointer_rtx
,
27785 GEN_INT (- 4 * num_regs
)));
27787 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
27788 stack_pointer_rtx
, hard_frame_pointer_rtx
);
27791 arm_emit_multi_reg_pop (saved_regs_mask
);
27793 if (IS_INTERRUPT (func_type
))
27795 /* Interrupt handlers will have pushed the
27796 IP onto the stack, so restore it now. */
27798 rtx addr
= gen_rtx_MEM (SImode
,
27799 gen_rtx_POST_INC (SImode
,
27800 stack_pointer_rtx
));
27801 set_mem_alias_set (addr
, get_frame_alias_set ());
27802 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
27803 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27804 gen_rtx_REG (SImode
, IP_REGNUM
),
27808 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
27811 if (crtl
->calls_eh_return
)
27812 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27814 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27816 if (IS_STACKALIGN (func_type
))
27817 /* Restore the original stack pointer. Before prologue, the stack was
27818 realigned and the original stack pointer saved in r0. For details,
27819 see comment in arm_expand_prologue. */
27820 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
27822 emit_jump_insn (simple_return_rtx
);
27825 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27826 function is not a sibcall. */
27828 arm_expand_epilogue (bool really_return
)
27830 unsigned long func_type
;
27831 unsigned long saved_regs_mask
;
27835 arm_stack_offsets
*offsets
;
27837 func_type
= arm_current_func_type ();
27839 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27840 let output_return_instruction take care of instruction emission if any. */
27841 if (IS_NAKED (func_type
)
27842 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
27845 emit_jump_insn (simple_return_rtx
);
27849 /* If we are throwing an exception, then we really must be doing a
27850 return, so we can't tail-call. */
27851 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
27853 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
27855 arm_expand_epilogue_apcs_frame (really_return
);
27859 /* Get frame offsets for ARM. */
27860 offsets
= arm_get_frame_offsets ();
27861 saved_regs_mask
= offsets
->saved_regs_mask
;
27862 num_regs
= bit_count (saved_regs_mask
);
27864 if (frame_pointer_needed
)
27867 /* Restore stack pointer if necessary. */
27870 /* In ARM mode, frame pointer points to first saved register.
27871 Restore stack pointer to last saved register. */
27872 amount
= offsets
->frame
- offsets
->saved_regs
;
27874 /* Force out any pending memory operations that reference stacked data
27875 before stack de-allocation occurs. */
27876 emit_insn (gen_blockage ());
27877 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27878 hard_frame_pointer_rtx
,
27879 GEN_INT (amount
)));
27880 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27882 hard_frame_pointer_rtx
);
27884 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27886 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27890 /* In Thumb-2 mode, the frame pointer points to the last saved
27892 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27895 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
27896 hard_frame_pointer_rtx
,
27897 GEN_INT (amount
)));
27898 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27899 hard_frame_pointer_rtx
,
27900 hard_frame_pointer_rtx
);
27903 /* Force out any pending memory operations that reference stacked data
27904 before stack de-allocation occurs. */
27905 emit_insn (gen_blockage ());
27906 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
27907 hard_frame_pointer_rtx
));
27908 arm_add_cfa_adjust_cfa_note (insn
, 0,
27910 hard_frame_pointer_rtx
);
27911 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27913 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27918 /* Pop off outgoing args and local frame to adjust stack pointer to
27919 last saved register. */
27920 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27924 /* Force out any pending memory operations that reference stacked data
27925 before stack de-allocation occurs. */
27926 emit_insn (gen_blockage ());
27927 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27929 GEN_INT (amount
)));
27930 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27931 stack_pointer_rtx
, stack_pointer_rtx
);
27932 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27934 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27938 if (TARGET_VFP_BASE
)
27940 /* Generate VFP register multi-pop. */
27941 int end_reg
= LAST_VFP_REGNUM
+ 1;
27943 /* Scan the registers in reverse order. We need to match
27944 any groupings made in the prologue and generate matching
27945 vldm operations. The need to match groups is because,
27946 unlike pop, vldm can only do consecutive regs. */
27947 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
27948 /* Look for a case where a reg does not need restoring. */
27949 if (!reg_needs_saving_p (i
) && !reg_needs_saving_p (i
+ 1))
27951 /* Restore the regs discovered so far (from reg+2 to
27953 if (end_reg
> i
+ 2)
27954 arm_emit_vfp_multi_reg_pop (i
+ 2,
27955 (end_reg
- (i
+ 2)) / 2,
27956 stack_pointer_rtx
);
27960 /* Restore the remaining regs that we have discovered (or possibly
27961 even all of them, if the conditional in the for loop never
27963 if (end_reg
> i
+ 2)
27964 arm_emit_vfp_multi_reg_pop (i
+ 2,
27965 (end_reg
- (i
+ 2)) / 2,
27966 stack_pointer_rtx
);
27970 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
27971 if (reg_needs_saving_p (i
))
27974 rtx addr
= gen_rtx_MEM (V2SImode
,
27975 gen_rtx_POST_INC (SImode
,
27976 stack_pointer_rtx
));
27977 set_mem_alias_set (addr
, get_frame_alias_set ());
27978 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27979 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27980 gen_rtx_REG (V2SImode
, i
),
27982 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27983 stack_pointer_rtx
, stack_pointer_rtx
);
27986 if (saved_regs_mask
)
27989 bool return_in_pc
= false;
27991 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
27992 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
27993 && !IS_CMSE_ENTRY (func_type
)
27994 && !IS_STACKALIGN (func_type
)
27996 && crtl
->args
.pretend_args_size
== 0
27997 && saved_regs_mask
& (1 << LR_REGNUM
)
27998 && !crtl
->calls_eh_return
27999 && !arm_current_function_pac_enabled_p ())
28001 saved_regs_mask
&= ~(1 << LR_REGNUM
);
28002 saved_regs_mask
|= (1 << PC_REGNUM
);
28003 return_in_pc
= true;
28006 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
28008 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
28009 if (saved_regs_mask
& (1 << i
))
28011 rtx addr
= gen_rtx_MEM (SImode
,
28012 gen_rtx_POST_INC (SImode
,
28013 stack_pointer_rtx
));
28014 set_mem_alias_set (addr
, get_frame_alias_set ());
28016 if (i
== PC_REGNUM
)
28018 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
28019 XVECEXP (insn
, 0, 0) = ret_rtx
;
28020 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
28022 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
28023 insn
= emit_jump_insn (insn
);
28027 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
28029 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
28030 gen_rtx_REG (SImode
, i
),
28032 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
28034 stack_pointer_rtx
);
28041 && current_tune
->prefer_ldrd_strd
28042 && !optimize_function_for_size_p (cfun
))
28045 thumb2_emit_ldrd_pop (saved_regs_mask
);
28046 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
28047 arm_emit_ldrd_pop (saved_regs_mask
);
28049 arm_emit_multi_reg_pop (saved_regs_mask
);
28052 arm_emit_multi_reg_pop (saved_regs_mask
);
28060 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
28064 rtx dwarf
= NULL_RTX
;
28066 emit_insn (gen_addsi3 (stack_pointer_rtx
,
28068 GEN_INT (amount
)));
28070 RTX_FRAME_RELATED_P (tmp
) = 1;
28072 if (cfun
->machine
->uses_anonymous_args
)
28074 /* Restore pretend args. Refer arm_expand_prologue on how to save
28075 pretend_args in stack. */
28076 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
28077 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
28078 for (j
= 0, i
= 0; j
< num_regs
; i
++)
28079 if (saved_regs_mask
& (1 << i
))
28081 rtx reg
= gen_rtx_REG (SImode
, i
);
28082 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
28085 REG_NOTES (tmp
) = dwarf
;
28087 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
28088 stack_pointer_rtx
, stack_pointer_rtx
);
28091 if (IS_CMSE_ENTRY (func_type
))
28093 /* CMSE_ENTRY always returns. */
28094 gcc_assert (really_return
);
28095 /* Clear all caller-saved regs that are not used to return. */
28096 cmse_nonsecure_entry_clear_before_return ();
28098 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
28100 if (TARGET_HAVE_FPCXT_CMSE
)
28104 insn
= emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx
,
28105 GEN_INT (FPCXTNS_ENUM
)));
28106 rtx dwarf
= gen_rtx_SET (stack_pointer_rtx
,
28107 plus_constant (Pmode
, stack_pointer_rtx
, 4));
28108 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
28109 RTX_FRAME_RELATED_P (insn
) = 1;
28113 if (arm_current_function_pac_enabled_p ())
28114 emit_insn (gen_aut_nop ());
28116 if (!really_return
)
28119 if (crtl
->calls_eh_return
)
28120 emit_insn (gen_addsi3 (stack_pointer_rtx
,
28122 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
28124 if (IS_STACKALIGN (func_type
))
28125 /* Restore the original stack pointer. Before prologue, the stack was
28126 realigned and the original stack pointer saved in r0. For details,
28127 see comment in arm_expand_prologue. */
28128 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
28130 emit_jump_insn (simple_return_rtx
);
28133 /* Implementation of insn prologue_thumb1_interwork. This is the first
28134 "instruction" of a function called in ARM mode. Swap to thumb mode. */
28137 thumb1_output_interwork (void)
28140 FILE *f
= asm_out_file
;
28142 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
28143 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
28145 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
28147 /* Generate code sequence to switch us into Thumb mode. */
28148 /* The .code 32 directive has already been emitted by
28149 ASM_DECLARE_FUNCTION_NAME. */
28150 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
28151 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
28153 /* Generate a label, so that the debugger will notice the
28154 change in instruction sets. This label is also used by
28155 the assembler to bypass the ARM code when this function
28156 is called from a Thumb encoded function elsewhere in the
28157 same file. Hence the definition of STUB_NAME here must
28158 agree with the definition in gas/config/tc-arm.c. */
28160 #define STUB_NAME ".real_start_of"
28162 fprintf (f
, "\t.code\t16\n");
28164 if (arm_dllexport_name_p (name
))
28165 name
= arm_strip_name_encoding (name
);
28167 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
28168 fprintf (f
, "\t.thumb_func\n");
28169 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
28174 /* Handle the case of a double word load into a low register from
28175 a computed memory address. The computed address may involve a
28176 register which is overwritten by the load. */
28178 thumb_load_double_from_address (rtx
*operands
)
28186 gcc_assert (REG_P (operands
[0]));
28187 gcc_assert (MEM_P (operands
[1]));
28189 /* Get the memory address. */
28190 addr
= XEXP (operands
[1], 0);
28192 /* Work out how the memory address is computed. */
28193 switch (GET_CODE (addr
))
28196 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28198 if (REGNO (operands
[0]) == REGNO (addr
))
28200 output_asm_insn ("ldr\t%H0, %2", operands
);
28201 output_asm_insn ("ldr\t%0, %1", operands
);
28205 output_asm_insn ("ldr\t%0, %1", operands
);
28206 output_asm_insn ("ldr\t%H0, %2", operands
);
28211 /* Compute <address> + 4 for the high order load. */
28212 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28214 output_asm_insn ("ldr\t%0, %1", operands
);
28215 output_asm_insn ("ldr\t%H0, %2", operands
);
28219 arg1
= XEXP (addr
, 0);
28220 arg2
= XEXP (addr
, 1);
28222 if (CONSTANT_P (arg1
))
28223 base
= arg2
, offset
= arg1
;
28225 base
= arg1
, offset
= arg2
;
28227 gcc_assert (REG_P (base
));
28229 /* Catch the case of <address> = <reg> + <reg> */
28230 if (REG_P (offset
))
28232 int reg_offset
= REGNO (offset
);
28233 int reg_base
= REGNO (base
);
28234 int reg_dest
= REGNO (operands
[0]);
28236 /* Add the base and offset registers together into the
28237 higher destination register. */
28238 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
28239 reg_dest
+ 1, reg_base
, reg_offset
);
28241 /* Load the lower destination register from the address in
28242 the higher destination register. */
28243 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
28244 reg_dest
, reg_dest
+ 1);
28246 /* Load the higher destination register from its own address
28248 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
28249 reg_dest
+ 1, reg_dest
+ 1);
28253 /* Compute <address> + 4 for the high order load. */
28254 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28256 /* If the computed address is held in the low order register
28257 then load the high order register first, otherwise always
28258 load the low order register first. */
28259 if (REGNO (operands
[0]) == REGNO (base
))
28261 output_asm_insn ("ldr\t%H0, %2", operands
);
28262 output_asm_insn ("ldr\t%0, %1", operands
);
28266 output_asm_insn ("ldr\t%0, %1", operands
);
28267 output_asm_insn ("ldr\t%H0, %2", operands
);
28273 /* With no registers to worry about we can just load the value
28275 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28277 output_asm_insn ("ldr\t%H0, %2", operands
);
28278 output_asm_insn ("ldr\t%0, %1", operands
);
28282 gcc_unreachable ();
28289 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
28294 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28295 std::swap (operands
[4], operands
[5]);
28297 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
28298 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
28302 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28303 std::swap (operands
[4], operands
[5]);
28304 if (REGNO (operands
[5]) > REGNO (operands
[6]))
28305 std::swap (operands
[5], operands
[6]);
28306 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28307 std::swap (operands
[4], operands
[5]);
28309 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
28310 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
28314 gcc_unreachable ();
28320 /* Output a call-via instruction for thumb state. */
28322 thumb_call_via_reg (rtx reg
)
28324 int regno
= REGNO (reg
);
28327 gcc_assert (regno
< LR_REGNUM
);
28329 /* If we are in the normal text section we can use a single instance
28330 per compilation unit. If we are doing function sections, then we need
28331 an entry per section, since we can't rely on reachability. */
28332 if (in_section
== text_section
)
28334 thumb_call_reg_needed
= 1;
28336 if (thumb_call_via_label
[regno
] == NULL
)
28337 thumb_call_via_label
[regno
] = gen_label_rtx ();
28338 labelp
= thumb_call_via_label
+ regno
;
28342 if (cfun
->machine
->call_via
[regno
] == NULL
)
28343 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
28344 labelp
= cfun
->machine
->call_via
+ regno
;
28347 output_asm_insn ("bl\t%a0", labelp
);
28351 /* Routines for generating rtl. */
28353 thumb_expand_cpymemqi (rtx
*operands
)
28355 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
28356 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
28357 HOST_WIDE_INT len
= INTVAL (operands
[2]);
28358 HOST_WIDE_INT offset
= 0;
28362 emit_insn (gen_cpymem12b (out
, in
, out
, in
));
28368 emit_insn (gen_cpymem8b (out
, in
, out
, in
));
28374 rtx reg
= gen_reg_rtx (SImode
);
28375 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
28376 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
28383 rtx reg
= gen_reg_rtx (HImode
);
28384 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
28385 plus_constant (Pmode
, in
,
28387 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
28396 rtx reg
= gen_reg_rtx (QImode
);
28397 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
28398 plus_constant (Pmode
, in
,
28400 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
28407 thumb_reload_out_hi (rtx
*operands
)
28409 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
28412 /* Return the length of a function name prefix
28413 that starts with the character 'c'. */
28415 arm_get_strip_length (int c
)
28419 ARM_NAME_ENCODING_LENGTHS
28424 /* Return a pointer to a function's name with any
28425 and all prefix encodings stripped from it. */
28427 arm_strip_name_encoding (const char *name
)
28431 while ((skip
= arm_get_strip_length (* name
)))
28437 /* If there is a '*' anywhere in the name's prefix, then
28438 emit the stripped name verbatim, otherwise prepend an
28439 underscore if leading underscores are being used. */
28441 arm_asm_output_labelref (FILE *stream
, const char *name
)
28446 while ((skip
= arm_get_strip_length (* name
)))
28448 verbatim
|= (*name
== '*');
28453 fputs (name
, stream
);
28455 asm_fprintf (stream
, "%U%s", name
);
28458 /* This function is used to emit an EABI tag and its associated value.
28459 We emit the numerical value of the tag in case the assembler does not
28460 support textual tags. (Eg gas prior to 2.20). If requested we include
28461 the tag name in a comment so that anyone reading the assembler output
28462 will know which tag is being set.
28464 This function is not static because arm-c.cc needs it too. */
28467 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
28469 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
28470 if (flag_verbose_asm
|| flag_debug_asm
)
28471 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
28472 asm_fprintf (asm_out_file
, "\n");
28475 /* This function is used to print CPU tuning information as comment
28476 in assembler file. Pointers are not printed for now. */
28479 arm_print_tune_info (void)
28481 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
28482 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
28483 current_tune
->constant_limit
);
28484 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28485 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
28486 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28487 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
28488 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28489 "prefetch.l1_cache_size:\t%d\n",
28490 current_tune
->prefetch
.l1_cache_size
);
28491 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28492 "prefetch.l1_cache_line_size:\t%d\n",
28493 current_tune
->prefetch
.l1_cache_line_size
);
28494 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28495 "prefer_constant_pool:\t%d\n",
28496 (int) current_tune
->prefer_constant_pool
);
28497 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28498 "branch_cost:\t(s:speed, p:predictable)\n");
28499 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
28500 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
28501 current_tune
->branch_cost (false, false));
28502 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
28503 current_tune
->branch_cost (false, true));
28504 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
28505 current_tune
->branch_cost (true, false));
28506 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
28507 current_tune
->branch_cost (true, true));
28508 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28509 "prefer_ldrd_strd:\t%d\n",
28510 (int) current_tune
->prefer_ldrd_strd
);
28511 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28512 "logical_op_non_short_circuit:\t[%d,%d]\n",
28513 (int) current_tune
->logical_op_non_short_circuit_thumb
,
28514 (int) current_tune
->logical_op_non_short_circuit_arm
);
28515 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28516 "disparage_flag_setting_t16_encodings:\t%d\n",
28517 (int) current_tune
->disparage_flag_setting_t16_encodings
);
28518 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28519 "string_ops_prefer_neon:\t%d\n",
28520 (int) current_tune
->string_ops_prefer_neon
);
28521 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28522 "max_insns_inline_memset:\t%d\n",
28523 current_tune
->max_insns_inline_memset
);
28524 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
28525 current_tune
->fusible_ops
);
28526 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
28527 (int) current_tune
->sched_autopref
);
28530 /* The last set of target options used to emit .arch directives, etc. This
28531 could be a function-local static if it were not required to expose it as a
28532 root to the garbage collector. */
28533 static GTY(()) cl_target_option
*last_asm_targ_options
= NULL
;
28535 /* Print .arch and .arch_extension directives corresponding to the
28536 current architecture configuration. */
28538 arm_print_asm_arch_directives (FILE *stream
, cl_target_option
*targ_options
)
28540 arm_build_target build_target
;
28541 /* If the target options haven't changed since the last time we were called
28542 there is nothing to do. This should be sufficient to suppress the
28543 majority of redundant work. */
28544 if (last_asm_targ_options
== targ_options
)
28547 last_asm_targ_options
= targ_options
;
28549 build_target
.isa
= sbitmap_alloc (isa_num_bits
);
28550 arm_configure_build_target (&build_target
, targ_options
, false);
28552 if (build_target
.core_name
28553 && !bitmap_bit_p (build_target
.isa
, isa_bit_quirk_no_asmcpu
))
28555 const char* truncated_name
28556 = arm_rewrite_selected_cpu (build_target
.core_name
);
28557 asm_fprintf (stream
, "\t.cpu %s\n", truncated_name
);
28560 const arch_option
*arch
28561 = arm_parse_arch_option_name (all_architectures
, "-march",
28562 build_target
.arch_name
);
28563 auto_sbitmap
opt_bits (isa_num_bits
);
28567 if (strcmp (build_target
.arch_name
, "armv7ve") == 0)
28569 /* Keep backward compatability for assemblers which don't support
28570 armv7ve. Fortunately, none of the following extensions are reset
28571 by a .fpu directive. */
28572 asm_fprintf (stream
, "\t.arch armv7-a\n");
28573 asm_fprintf (stream
, "\t.arch_extension virt\n");
28574 asm_fprintf (stream
, "\t.arch_extension idiv\n");
28575 asm_fprintf (stream
, "\t.arch_extension sec\n");
28576 asm_fprintf (stream
, "\t.arch_extension mp\n");
28579 asm_fprintf (stream
, "\t.arch %s\n", build_target
.arch_name
);
28581 /* The .fpu directive will reset any architecture extensions from the
28582 assembler that relate to the fp/vector extensions. So put this out before
28583 any .arch_extension directives. */
28584 const char *fpu_name
= (TARGET_SOFT_FLOAT
28586 : arm_identify_fpu_from_isa (build_target
.isa
));
28587 asm_fprintf (stream
, "\t.fpu %s\n", fpu_name
);
28589 if (!arch
->common
.extensions
)
28592 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
28598 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
28600 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28601 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28602 floating point instructions is disabled. So the following check
28603 restricts the printing of ".arch_extension mve" and
28604 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28605 this special behaviour because the feature bit "mve" and
28606 "mve_float" are not part of "fpu bits", so they are not cleared
28607 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28608 TARGET_HAVE_MVE_FLOAT are disabled. */
28609 if ((bitmap_bit_p (opt_bits
, isa_bit_mve
) && !TARGET_HAVE_MVE
)
28610 || (bitmap_bit_p (opt_bits
, isa_bit_mve_float
)
28611 && !TARGET_HAVE_MVE_FLOAT
))
28614 /* If every feature bit of this option is set in the target ISA
28615 specification, print out the option name. However, don't print
28616 anything if all the bits are part of the FPU specification. */
28617 if (bitmap_subset_p (opt_bits
, build_target
.isa
)
28618 && !bitmap_subset_p (opt_bits
, isa_all_fpubits_internal
))
28619 asm_fprintf (stream
, "\t.arch_extension %s\n", opt
->name
);
28625 arm_file_start (void)
28628 bool pac
= (aarch_ra_sign_scope
!= AARCH_FUNCTION_NONE
);
28629 bool bti
= (aarch_enable_bti
== 1);
28631 arm_print_asm_arch_directives
28632 (asm_out_file
, TREE_TARGET_OPTION (target_option_default_node
));
28636 /* If we have a named cpu, but we the assembler does not support that
28637 name via .cpu, put out a cpu name attribute; but don't do this if the
28638 name starts with the fictitious prefix, 'generic'. */
28639 if (arm_active_target
.core_name
28640 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_asmcpu
)
28641 && !startswith (arm_active_target
.core_name
, "generic"))
28643 const char* truncated_name
28644 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
28645 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_asmcpu
))
28646 asm_fprintf (asm_out_file
, "\t.eabi_attribute 5, \"%s\"\n",
28650 if (print_tune_info
)
28651 arm_print_tune_info ();
28653 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
28654 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28656 if (TARGET_HARD_FLOAT_ABI
)
28657 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28659 /* Some of these attributes only apply when the corresponding features
28660 are used. However we don't have any easy way of figuring this out.
28661 Conservatively record the setting that would have been used. */
28663 if (flag_rounding_math
)
28664 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28666 if (!flag_unsafe_math_optimizations
)
28668 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28669 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28671 if (flag_signaling_nans
)
28672 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28674 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28675 flag_finite_math_only
? 1 : 3);
28677 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28678 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28679 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28680 flag_short_enums
? 1 : 2);
28682 /* Tag_ABI_optimization_goals. */
28685 else if (optimize
>= 2)
28691 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
28693 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28696 if (arm_fp16_format
)
28697 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28698 (int) arm_fp16_format
);
28700 if (TARGET_HAVE_PACBTI
)
28702 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
28703 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
28705 else if (pac
|| bti
)
28707 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
28708 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
28712 arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
28714 arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
28716 if (arm_lang_output_object_attributes_hook
)
28717 arm_lang_output_object_attributes_hook();
28720 default_file_start ();
28724 arm_file_end (void)
28728 /* Just in case the last function output in the assembler had non-default
28729 architecture directives, we force the assembler state back to the default
28730 set, so that any 'calculated' build attributes are based on the default
28731 options rather than the special options for that function. */
28732 arm_print_asm_arch_directives
28733 (asm_out_file
, TREE_TARGET_OPTION (target_option_default_node
));
28735 if (NEED_INDICATE_EXEC_STACK
)
28736 /* Add .note.GNU-stack. */
28737 file_end_indicate_exec_stack ();
28739 if (! thumb_call_reg_needed
)
28742 switch_to_section (text_section
);
28743 asm_fprintf (asm_out_file
, "\t.code 16\n");
28744 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
28746 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
28748 rtx label
= thumb_call_via_label
[regno
];
28752 targetm
.asm_out
.internal_label (asm_out_file
, "L",
28753 CODE_LABEL_NUMBER (label
));
28754 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
28760 /* Symbols in the text segment can be accessed without indirecting via the
28761 constant pool; it may take an extra binary operation, but this is still
28762 faster than indirecting via memory. Don't do this when not optimizing,
28763 since we won't be calculating al of the offsets necessary to do this
28767 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
28769 if (optimize
> 0 && TREE_CONSTANT (decl
))
28770 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
28772 default_encode_section_info (decl
, rtl
, first
);
28774 #endif /* !ARM_PE */
28777 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
28779 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
28780 && !strcmp (prefix
, "L"))
28782 arm_ccfsm_state
= 0;
28783 arm_target_insn
= NULL
;
28785 default_internal_label (stream
, prefix
, labelno
);
28788 /* Define classes to generate code as RTL or output asm to a file.
28789 Using templates then allows to use the same code to output code
28790 sequences in the two formats. */
28791 class thumb1_const_rtl
28794 thumb1_const_rtl (rtx dst
) : dst (dst
) {}
28796 void mov (HOST_WIDE_INT val
)
28798 emit_set_insn (dst
, GEN_INT (val
));
28801 void add (HOST_WIDE_INT val
)
28803 emit_set_insn (dst
, gen_rtx_PLUS (SImode
, dst
, GEN_INT (val
)));
28806 void ashift (HOST_WIDE_INT shift
)
28808 emit_set_insn (dst
, gen_rtx_ASHIFT (SImode
, dst
, GEN_INT (shift
)));
28813 emit_set_insn (dst
, gen_rtx_NEG (SImode
, dst
));
28820 class thumb1_const_print
28823 thumb1_const_print (FILE *f
, int regno
)
28826 dst_regname
= reg_names
[regno
];
28829 void mov (HOST_WIDE_INT val
)
28831 asm_fprintf (t_file
, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
28835 void add (HOST_WIDE_INT val
)
28837 asm_fprintf (t_file
, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
28841 void ashift (HOST_WIDE_INT shift
)
28843 asm_fprintf (t_file
, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
28844 dst_regname
, shift
);
28849 asm_fprintf (t_file
, "\trsbs\t%s, #0\n", dst_regname
);
28854 const char *dst_regname
;
28857 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28858 Avoid generating useless code when one of the bytes is zero. */
28861 thumb1_gen_const_int_1 (T dst
, HOST_WIDE_INT op1
)
28863 bool mov_done_p
= false;
28864 unsigned HOST_WIDE_INT val
= op1
;
28868 gcc_assert (op1
== trunc_int_for_mode (op1
, SImode
));
28876 /* For negative numbers with the first nine bits set, build the
28877 opposite of OP1, then negate it, it's generally shorter and not
28879 if ((val
& 0xFF800000) == 0xFF800000)
28881 thumb1_gen_const_int_1 (dst
, -op1
);
28886 /* In the general case, we need 7 instructions to build
28887 a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28888 do better if VAL is small enough, or
28889 right-shiftable by a suitable amount. If the
28890 right-shift enables to encode at least one less byte,
28891 it's worth it: we save a adds and a lsls at the
28892 expense of a final lsls. */
28893 int final_shift
= number_of_first_bit_set (val
);
28895 int leading_zeroes
= clz_hwi (val
);
28896 int number_of_bytes_needed
28897 = ((HOST_BITS_PER_WIDE_INT
- 1 - leading_zeroes
)
28898 / BITS_PER_UNIT
) + 1;
28899 int number_of_bytes_needed2
28900 = ((HOST_BITS_PER_WIDE_INT
- 1 - leading_zeroes
- final_shift
)
28901 / BITS_PER_UNIT
) + 1;
28903 if (number_of_bytes_needed2
< number_of_bytes_needed
)
28904 val
>>= final_shift
;
28908 /* If we are in a very small range, we can use either a single movs
28914 unsigned HOST_WIDE_INT high
= val
- 255;
28922 if (final_shift
> 0)
28923 dst
.ashift (final_shift
);
28927 /* General case, emit upper 3 bytes as needed. */
28928 for (i
= 0; i
< 3; i
++)
28930 unsigned HOST_WIDE_INT byte
= (val
>> (8 * (3 - i
))) & 0xff;
28934 /* We are about to emit new bits, stop accumulating a
28935 shift amount, and left-shift only if we have already
28936 emitted some upper bits. */
28939 dst
.ashift (shift
);
28945 /* Stop accumulating shift amount since we've just
28946 emitted some bits. */
28956 /* Emit lower byte. */
28958 dst
.mov (val
& 0xff);
28961 dst
.ashift (shift
);
28963 dst
.add (val
& 0xff);
28966 if (final_shift
> 0)
28967 dst
.ashift (final_shift
);
28971 /* Proxies for thumb1.md, since the thumb1_const_print and
28972 thumb1_const_rtl classes are not exported. */
28974 thumb1_gen_const_int_rtl (rtx dst
, HOST_WIDE_INT op1
)
28976 thumb1_const_rtl
t (dst
);
28977 thumb1_gen_const_int_1 (t
, op1
);
28981 thumb1_gen_const_int_print (rtx dst
, HOST_WIDE_INT op1
)
28983 thumb1_const_print
t (asm_out_file
, REGNO (dst
));
28984 thumb1_gen_const_int_1 (t
, op1
);
28987 /* Output code to add DELTA to the first argument, and then jump
28988 to FUNCTION. Used for C++ multiple inheritance. */
28991 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
28992 HOST_WIDE_INT
, tree function
)
28994 static int thunk_label
= 0;
28997 int mi_delta
= delta
;
28998 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
29000 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
29003 mi_delta
= - mi_delta
;
29005 final_start_function (emit_barrier (), file
, 1);
29009 int labelno
= thunk_label
++;
29010 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
29011 /* Thunks are entered in arm mode when available. */
29012 if (TARGET_THUMB1_ONLY
)
29014 /* push r3 so we can use it as a temporary. */
29015 /* TODO: Omit this save if r3 is not used. */
29016 fputs ("\tpush {r3}\n", file
);
29018 /* With -mpure-code, we cannot load the address from the
29019 constant pool: we build it explicitly. */
29020 if (target_pure_code
)
29022 fputs ("\tmovs\tr3, #:upper8_15:#", file
);
29023 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29024 fputc ('\n', file
);
29025 fputs ("\tlsls r3, #8\n", file
);
29026 fputs ("\tadds\tr3, #:upper0_7:#", file
);
29027 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29028 fputc ('\n', file
);
29029 fputs ("\tlsls r3, #8\n", file
);
29030 fputs ("\tadds\tr3, #:lower8_15:#", file
);
29031 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29032 fputc ('\n', file
);
29033 fputs ("\tlsls r3, #8\n", file
);
29034 fputs ("\tadds\tr3, #:lower0_7:#", file
);
29035 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29036 fputc ('\n', file
);
29039 fputs ("\tldr\tr3, ", file
);
29043 fputs ("\tldr\tr12, ", file
);
29046 if (!target_pure_code
)
29048 assemble_name (file
, label
);
29049 fputc ('\n', file
);
29054 /* If we are generating PIC, the ldr instruction below loads
29055 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
29056 the address of the add + 8, so we have:
29058 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
29061 Note that we have "+ 1" because some versions of GNU ld
29062 don't set the low bit of the result for R_ARM_REL32
29063 relocations against thumb function symbols.
29064 On ARMv6M this is +4, not +8. */
29065 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
29066 assemble_name (file
, labelpc
);
29067 fputs (":\n", file
);
29068 if (TARGET_THUMB1_ONLY
)
29070 /* This is 2 insns after the start of the thunk, so we know it
29071 is 4-byte aligned. */
29072 fputs ("\tadd\tr3, pc, r3\n", file
);
29073 fputs ("\tmov r12, r3\n", file
);
29076 fputs ("\tadd\tr12, pc, r12\n", file
);
29078 else if (TARGET_THUMB1_ONLY
)
29079 fputs ("\tmov r12, r3\n", file
);
29081 if (TARGET_THUMB1_ONLY
)
29083 if (mi_delta
> 255)
29085 /* With -mpure-code, we cannot load MI_DELTA from the
29086 constant pool: we build it explicitly. */
29087 if (target_pure_code
)
29089 thumb1_const_print
r3 (file
, 3);
29090 thumb1_gen_const_int_1 (r3
, mi_delta
);
29094 fputs ("\tldr\tr3, ", file
);
29095 assemble_name (file
, label
);
29096 fputs ("+4\n", file
);
29098 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
29099 mi_op
, this_regno
, this_regno
);
29101 else if (mi_delta
!= 0)
29103 /* Thumb1 unified syntax requires s suffix in instruction name when
29104 one of the operands is immediate. */
29105 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
29106 mi_op
, this_regno
, this_regno
,
29112 /* TODO: Use movw/movt for large constants when available. */
29113 while (mi_delta
!= 0)
29115 if ((mi_delta
& (3 << shift
)) == 0)
29119 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
29120 mi_op
, this_regno
, this_regno
,
29121 mi_delta
& (0xff << shift
));
29122 mi_delta
&= ~(0xff << shift
);
29129 if (TARGET_THUMB1_ONLY
)
29130 fputs ("\tpop\t{r3}\n", file
);
29132 fprintf (file
, "\tbx\tr12\n");
29134 /* With -mpure-code, we don't need to emit literals for the
29135 function address and delta since we emitted code to build
29137 if (!target_pure_code
)
29139 ASM_OUTPUT_ALIGN (file
, 2);
29140 assemble_name (file
, label
);
29141 fputs (":\n", file
);
29144 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
29145 rtx tem
= XEXP (DECL_RTL (function
), 0);
29146 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
29147 pipeline offset is four rather than eight. Adjust the offset
29149 tem
= plus_constant (GET_MODE (tem
), tem
,
29150 TARGET_THUMB1_ONLY
? -3 : -7);
29151 tem
= gen_rtx_MINUS (GET_MODE (tem
),
29153 gen_rtx_SYMBOL_REF (Pmode
,
29154 ggc_strdup (labelpc
)));
29155 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
29158 /* Output ".word .LTHUNKn". */
29159 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
29161 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
29162 assemble_integer (GEN_INT (mi_delta
), 4, BITS_PER_WORD
, 1);
29167 fputs ("\tb\t", file
);
29168 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29169 if (NEED_PLT_RELOC
)
29170 fputs ("(PLT)", file
);
29171 fputc ('\n', file
);
29174 final_end_function ();
29177 /* MI thunk handling for TARGET_32BIT. */
29180 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
29181 HOST_WIDE_INT vcall_offset
, tree function
)
29183 const bool long_call_p
= arm_is_long_call_p (function
);
29185 /* On ARM, this_regno is R0 or R1 depending on
29186 whether the function returns an aggregate or not.
29188 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
29190 ? R1_REGNUM
: R0_REGNUM
);
29192 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
29193 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
29194 reload_completed
= 1;
29195 emit_note (NOTE_INSN_PROLOGUE_END
);
29197 /* Add DELTA to THIS_RTX. */
29199 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
29200 delta
, this_rtx
, this_rtx
, false);
29202 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
29203 if (vcall_offset
!= 0)
29205 /* Load *THIS_RTX. */
29206 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
29207 /* Compute *THIS_RTX + VCALL_OFFSET. */
29208 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
29210 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
29211 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
29212 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
29215 /* Generate a tail call to the target function. */
29216 if (!TREE_USED (function
))
29218 assemble_external (function
);
29219 TREE_USED (function
) = 1;
29221 rtx funexp
= XEXP (DECL_RTL (function
), 0);
29224 emit_move_insn (temp
, funexp
);
29227 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
29228 rtx_insn
*insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
29229 SIBLING_CALL_P (insn
) = 1;
29232 /* Indirect calls require a bit of fixup in PIC mode. */
29235 split_all_insns_noflow ();
29239 insn
= get_insns ();
29240 shorten_branches (insn
);
29241 final_start_function (insn
, file
, 1);
29242 final (insn
, file
, 1);
29243 final_end_function ();
29245 /* Stop pretending this is a post-reload pass. */
29246 reload_completed
= 0;
29249 /* Output code to add DELTA to the first argument, and then jump
29250 to FUNCTION. Used for C++ multiple inheritance. */
29253 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
29254 HOST_WIDE_INT vcall_offset
, tree function
)
29256 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk
));
29258 assemble_start_function (thunk
, fnname
);
29260 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
29262 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
29263 assemble_end_function (thunk
, fnname
);
29267 arm_emit_vector_const (FILE *file
, rtx x
)
29270 const char * pattern
;
29272 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
29274 switch (GET_MODE (x
))
29276 case E_V2SImode
: pattern
= "%08x"; break;
29277 case E_V4HImode
: pattern
= "%04x"; break;
29278 case E_V8QImode
: pattern
= "%02x"; break;
29279 default: gcc_unreachable ();
29282 fprintf (file
, "0x");
29283 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
29287 element
= CONST_VECTOR_ELT (x
, i
);
29288 fprintf (file
, pattern
, INTVAL (element
));
29294 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29295 HFmode constant pool entries are actually loaded with ldr. */
29297 arm_emit_fp16_const (rtx c
)
29301 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
29302 if (WORDS_BIG_ENDIAN
)
29303 assemble_zeros (2);
29304 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
29305 if (!WORDS_BIG_ENDIAN
)
29306 assemble_zeros (2);
29310 arm_output_load_gr (rtx
*operands
)
29317 if (!MEM_P (operands
[1])
29318 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
29319 || !REG_P (reg
= XEXP (sum
, 0))
29320 || !CONST_INT_P (offset
= XEXP (sum
, 1))
29321 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
29322 return "wldrw%?\t%0, %1";
29324 /* Fix up an out-of-range load of a GR register. */
29325 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
29326 wcgr
= operands
[0];
29328 output_asm_insn ("ldr%?\t%0, %1", operands
);
29330 operands
[0] = wcgr
;
29332 output_asm_insn ("tmcr%?\t%0, %1", operands
);
29333 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
29338 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29340 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29341 named arg and all anonymous args onto the stack.
29342 XXX I know the prologue shouldn't be pushing registers, but it is faster
29346 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
29347 const function_arg_info
&arg
,
29349 int second_time ATTRIBUTE_UNUSED
)
29351 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
29354 cfun
->machine
->uses_anonymous_args
= 1;
29355 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
29357 nregs
= pcum
->aapcs_ncrn
;
29358 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl
))
29361 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
29362 if (res
< 0 && warn_psabi
)
29363 inform (input_location
, "parameter passing for argument of "
29364 "type %qT changed in GCC 7.1", arg
.type
);
29368 if (res
> 1 && warn_psabi
)
29369 inform (input_location
,
29370 "parameter passing for argument of type "
29371 "%qT changed in GCC 9.1", arg
.type
);
29376 nregs
= pcum
->nregs
;
29378 if (nregs
< NUM_ARG_REGS
)
29379 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
29382 /* We can't rely on the caller doing the proper promotion when
29383 using APCS or ATPCS. */
29386 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
29388 return !TARGET_AAPCS_BASED
;
29391 static machine_mode
29392 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
29394 int *punsignedp ATTRIBUTE_UNUSED
,
29395 const_tree fntype ATTRIBUTE_UNUSED
,
29396 int for_return ATTRIBUTE_UNUSED
)
29398 if (GET_MODE_CLASS (mode
) == MODE_INT
29399 && GET_MODE_SIZE (mode
) < 4)
29407 arm_default_short_enums (void)
29409 return ARM_DEFAULT_SHORT_ENUMS
;
29413 /* AAPCS requires that anonymous bitfields affect structure alignment. */
29416 arm_align_anon_bitfield (void)
29418 return TARGET_AAPCS_BASED
;
29422 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
29425 arm_cxx_guard_type (void)
29427 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
29431 /* The EABI says test the least significant bit of a guard variable. */
29434 arm_cxx_guard_mask_bit (void)
29436 return TARGET_AAPCS_BASED
;
29440 /* The EABI specifies that all array cookies are 8 bytes long. */
29443 arm_get_cookie_size (tree type
)
29447 if (!TARGET_AAPCS_BASED
)
29448 return default_cxx_get_cookie_size (type
);
29450 size
= build_int_cst (sizetype
, 8);
29455 /* The EABI says that array cookies should also contain the element size. */
29458 arm_cookie_has_size (void)
29460 return TARGET_AAPCS_BASED
;
29464 /* The EABI says constructors and destructors should return a pointer to
29465 the object constructed/destroyed. */
29468 arm_cxx_cdtor_returns_this (void)
29470 return TARGET_AAPCS_BASED
;
29473 /* The EABI says that an inline function may never be the key
29477 arm_cxx_key_method_may_be_inline (void)
29479 return !TARGET_AAPCS_BASED
;
29483 arm_cxx_determine_class_data_visibility (tree decl
)
29485 if (!TARGET_AAPCS_BASED
29486 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
29489 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29490 is exported. However, on systems without dynamic vague linkage,
29491 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
29492 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
29493 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
29495 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
29496 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
29500 arm_cxx_class_data_always_comdat (void)
29502 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29503 vague linkage if the class has no key function. */
29504 return !TARGET_AAPCS_BASED
;
29508 /* The EABI says __aeabi_atexit should be used to register static
29512 arm_cxx_use_aeabi_atexit (void)
29514 return TARGET_AAPCS_BASED
;
29519 arm_set_return_address (rtx source
, rtx scratch
)
29521 arm_stack_offsets
*offsets
;
29522 HOST_WIDE_INT delta
;
29524 unsigned long saved_regs
;
29526 offsets
= arm_get_frame_offsets ();
29527 saved_regs
= offsets
->saved_regs_mask
;
29529 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
29530 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
29533 if (frame_pointer_needed
)
29534 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
29537 /* LR will be the first saved register. */
29538 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
29543 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
29544 GEN_INT (delta
& ~4095)));
29549 addr
= stack_pointer_rtx
;
29551 addr
= plus_constant (Pmode
, addr
, delta
);
29554 /* The store needs to be marked to prevent DSE from deleting
29555 it as dead if it is based on fp. */
29556 mem
= gen_frame_mem (Pmode
, addr
);
29557 MEM_VOLATILE_P (mem
) = true;
29558 emit_move_insn (mem
, source
);
29564 thumb_set_return_address (rtx source
, rtx scratch
)
29566 arm_stack_offsets
*offsets
;
29567 HOST_WIDE_INT delta
;
29568 HOST_WIDE_INT limit
;
29571 unsigned long mask
;
29575 offsets
= arm_get_frame_offsets ();
29576 mask
= offsets
->saved_regs_mask
;
29577 if (mask
& (1 << LR_REGNUM
))
29580 /* Find the saved regs. */
29581 if (frame_pointer_needed
)
29583 delta
= offsets
->soft_frame
- offsets
->saved_args
;
29584 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
29590 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
29593 /* Allow for the stack frame. */
29594 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
29596 /* The link register is always the first saved register. */
29599 /* Construct the address. */
29600 addr
= gen_rtx_REG (SImode
, reg
);
29603 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
29604 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
29608 addr
= plus_constant (Pmode
, addr
, delta
);
29610 /* The store needs to be marked to prevent DSE from deleting
29611 it as dead if it is based on fp. */
29612 mem
= gen_frame_mem (Pmode
, addr
);
29613 MEM_VOLATILE_P (mem
) = true;
29614 emit_move_insn (mem
, source
);
29617 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
29620 /* Implements target hook vector_mode_supported_p. */
29622 arm_vector_mode_supported_p (machine_mode mode
)
29624 /* Neon also supports V2SImode, etc. listed in the clause below. */
29625 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
29626 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
29627 || mode
== V2DImode
|| mode
== V8HFmode
|| mode
== V4BFmode
29628 || mode
== V8BFmode
))
29631 if ((TARGET_NEON
|| TARGET_IWMMXT
)
29632 && ((mode
== V2SImode
)
29633 || (mode
== V4HImode
)
29634 || (mode
== V8QImode
)))
29637 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
29638 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
29639 || mode
== V2HAmode
))
29642 if (TARGET_HAVE_MVE
29643 && (VALID_MVE_SI_MODE (mode
) || VALID_MVE_PRED_MODE (mode
)))
29646 if (TARGET_HAVE_MVE_FLOAT
29647 && (mode
== V2DFmode
|| mode
== V4SFmode
|| mode
== V8HFmode
))
29653 /* Implements target hook array_mode_supported_p. */
29656 arm_array_mode_supported_p (machine_mode mode
,
29657 unsigned HOST_WIDE_INT nelems
)
29659 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29660 for now, as the lane-swapping logic needs to be extended in the expanders.
29661 See PR target/82518. */
29662 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
29663 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
29664 && (nelems
>= 2 && nelems
<= 4))
29667 if (TARGET_HAVE_MVE
&& !BYTES_BIG_ENDIAN
29668 && VALID_MVE_MODE (mode
) && (nelems
== 2 || nelems
== 4))
29674 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29675 registers when autovectorizing for Neon, at least until multiple vector
29676 widths are supported properly by the middle-end. */
29678 static machine_mode
29679 arm_preferred_simd_mode (scalar_mode mode
)
29685 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HFmode
: V8HFmode
;
29687 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
29689 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
29691 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
29693 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
29695 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
29702 if (TARGET_REALLY_IWMMXT
)
29715 if (TARGET_HAVE_MVE
)
29728 if (TARGET_HAVE_MVE_FLOAT
)
29742 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29744 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29745 using r0-r4 for function arguments, r7 for the stack frame and don't have
29746 enough left over to do doubleword arithmetic. For Thumb-2 all the
29747 potentially problematic instructions accept high registers so this is not
29748 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29749 that require many low registers. */
29751 arm_class_likely_spilled_p (reg_class_t rclass
)
29753 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
29754 || rclass
== CC_REG
)
29757 return default_class_likely_spilled_p (rclass
);
29760 /* Implements target hook small_register_classes_for_mode_p. */
29762 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
29764 return TARGET_THUMB1
;
29767 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29768 ARM insns and therefore guarantee that the shift count is modulo 256.
29769 DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29770 guarantee no particular behavior for out-of-range counts. */
29772 static unsigned HOST_WIDE_INT
29773 arm_shift_truncation_mask (machine_mode mode
)
29775 return mode
== SImode
? 255 : 0;
29779 /* Map internal gcc register numbers to DWARF2 register numbers. */
29782 arm_debugger_regno (unsigned int regno
)
29787 if (IS_VFP_REGNUM (regno
))
29789 /* See comment in arm_dwarf_register_span. */
29790 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29791 return 64 + regno
- FIRST_VFP_REGNUM
;
29793 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
29796 if (IS_IWMMXT_GR_REGNUM (regno
))
29797 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
29799 if (IS_IWMMXT_REGNUM (regno
))
29800 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
29802 if (IS_PAC_REGNUM (regno
))
29803 return DWARF_PAC_REGNUM
;
29805 return DWARF_FRAME_REGISTERS
;
29808 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29809 GCC models tham as 64 32-bit registers, so we need to describe this to
29810 the DWARF generation code. Other registers can use the default. */
29812 arm_dwarf_register_span (rtx rtl
)
29820 regno
= REGNO (rtl
);
29821 if (!IS_VFP_REGNUM (regno
))
29824 /* XXX FIXME: The EABI defines two VFP register ranges:
29825 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29827 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29828 corresponding D register. Until GDB supports this, we shall use the
29829 legacy encodings. We also use these encodings for D0-D15 for
29830 compatibility with older debuggers. */
29831 mode
= GET_MODE (rtl
);
29832 if (GET_MODE_SIZE (mode
) < 8)
29835 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29837 nregs
= GET_MODE_SIZE (mode
) / 4;
29838 for (i
= 0; i
< nregs
; i
+= 2)
29839 if (TARGET_BIG_END
)
29841 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29842 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
29846 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
29847 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29852 nregs
= GET_MODE_SIZE (mode
) / 8;
29853 for (i
= 0; i
< nregs
; i
++)
29854 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
29857 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
29860 #if ARM_UNWIND_INFO
29861 /* Emit unwind directives for a store-multiple instruction or stack pointer
29862 push during alignment.
29863 These should only ever be generated by the function prologue code, so
29864 expect them to have a particular form.
29865 The store-multiple instruction sometimes pushes pc as the last register,
29866 although it should not be tracked into unwind information, or for -Os
29867 sometimes pushes some dummy registers before first register that needs
29868 to be tracked in unwind information; such dummy registers are there just
29869 to avoid separate stack adjustment, and will not be restored in the
29873 arm_unwind_emit_sequence (FILE * out_file
, rtx p
)
29876 HOST_WIDE_INT offset
;
29877 HOST_WIDE_INT nregs
;
29881 unsigned padfirst
= 0, padlast
= 0;
29884 e
= XVECEXP (p
, 0, 0);
29885 gcc_assert (GET_CODE (e
) == SET
);
29887 /* First insn will adjust the stack pointer. */
29888 gcc_assert (GET_CODE (e
) == SET
29889 && REG_P (SET_DEST (e
))
29890 && REGNO (SET_DEST (e
)) == SP_REGNUM
29891 && GET_CODE (SET_SRC (e
)) == PLUS
);
29893 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
29894 nregs
= XVECLEN (p
, 0) - 1;
29895 gcc_assert (nregs
);
29897 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
29898 if (reg
< 16 || IS_PAC_REGNUM (reg
))
29900 /* For -Os dummy registers can be pushed at the beginning to
29901 avoid separate stack pointer adjustment. */
29902 e
= XVECEXP (p
, 0, 1);
29903 e
= XEXP (SET_DEST (e
), 0);
29904 if (GET_CODE (e
) == PLUS
)
29905 padfirst
= INTVAL (XEXP (e
, 1));
29906 gcc_assert (padfirst
== 0 || optimize_size
);
29907 /* The function prologue may also push pc, but not annotate it as it is
29908 never restored. We turn this into a stack pointer adjustment. */
29909 e
= XVECEXP (p
, 0, nregs
);
29910 e
= XEXP (SET_DEST (e
), 0);
29911 if (GET_CODE (e
) == PLUS
)
29912 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
29914 padlast
= offset
- 4;
29915 gcc_assert (padlast
== 0 || padlast
== 4);
29917 fprintf (out_file
, "\t.pad #4\n");
29919 fprintf (out_file
, "\t.save {");
29921 else if (IS_VFP_REGNUM (reg
))
29924 fprintf (out_file
, "\t.vsave {");
29927 /* Unknown register type. */
29928 gcc_unreachable ();
29930 /* If the stack increment doesn't match the size of the saved registers,
29931 something has gone horribly wrong. */
29932 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
29936 /* The remaining insns will describe the stores. */
29937 for (i
= 1; i
<= nregs
; i
++)
29939 /* Expect (set (mem <addr>) (reg)).
29940 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29941 e
= XVECEXP (p
, 0, i
);
29942 gcc_assert (GET_CODE (e
) == SET
29943 && MEM_P (SET_DEST (e
))
29944 && REG_P (SET_SRC (e
)));
29946 reg
= REGNO (SET_SRC (e
));
29947 gcc_assert (reg
>= lastreg
);
29950 fprintf (out_file
, ", ");
29951 /* We can't use %r for vfp because we need to use the
29952 double precision register names. */
29953 if (IS_VFP_REGNUM (reg
))
29954 asm_fprintf (out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
29955 else if (IS_PAC_REGNUM (reg
))
29956 asm_fprintf (asm_out_file
, "ra_auth_code");
29958 asm_fprintf (out_file
, "%r", reg
);
29962 /* Check that the addresses are consecutive. */
29963 e
= XEXP (SET_DEST (e
), 0);
29964 if (GET_CODE (e
) == PLUS
)
29965 gcc_assert (REG_P (XEXP (e
, 0))
29966 && REGNO (XEXP (e
, 0)) == SP_REGNUM
29967 && CONST_INT_P (XEXP (e
, 1))
29968 && offset
== INTVAL (XEXP (e
, 1)));
29972 && REGNO (e
) == SP_REGNUM
);
29973 offset
+= reg_size
;
29976 fprintf (out_file
, "}\n");
29978 fprintf (out_file
, "\t.pad #%d\n", padfirst
);
29981 /* Emit unwind directives for a SET. */
29984 arm_unwind_emit_set (FILE * out_file
, rtx p
)
29992 switch (GET_CODE (e0
))
29995 /* Pushing a single register. */
29996 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
29997 || !REG_P (XEXP (XEXP (e0
, 0), 0))
29998 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
30001 asm_fprintf (out_file
, "\t.save ");
30002 if (IS_VFP_REGNUM (REGNO (e1
)))
30003 asm_fprintf(out_file
, "{d%d}\n",
30004 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
30006 asm_fprintf(out_file
, "{%r}\n", REGNO (e1
));
30010 if (REGNO (e0
) == SP_REGNUM
)
30012 /* A stack increment. */
30013 if (GET_CODE (e1
) != PLUS
30014 || !REG_P (XEXP (e1
, 0))
30015 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
30016 || !CONST_INT_P (XEXP (e1
, 1)))
30019 asm_fprintf (out_file
, "\t.pad #%wd\n",
30020 -INTVAL (XEXP (e1
, 1)));
30022 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
30024 HOST_WIDE_INT offset
;
30026 if (GET_CODE (e1
) == PLUS
)
30028 if (!REG_P (XEXP (e1
, 0))
30029 || !CONST_INT_P (XEXP (e1
, 1)))
30031 reg
= REGNO (XEXP (e1
, 0));
30032 offset
= INTVAL (XEXP (e1
, 1));
30033 asm_fprintf (out_file
, "\t.setfp %r, %r, #%wd\n",
30034 HARD_FRAME_POINTER_REGNUM
, reg
,
30037 else if (REG_P (e1
))
30040 asm_fprintf (out_file
, "\t.setfp %r, %r\n",
30041 HARD_FRAME_POINTER_REGNUM
, reg
);
30046 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
30048 /* Move from sp to reg. */
30049 asm_fprintf (out_file
, "\t.movsp %r\n", REGNO (e0
));
30051 else if (GET_CODE (e1
) == PLUS
30052 && REG_P (XEXP (e1
, 0))
30053 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
30054 && CONST_INT_P (XEXP (e1
, 1)))
30056 /* Set reg to offset from sp. */
30057 asm_fprintf (out_file
, "\t.movsp %r, #%d\n",
30058 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
30060 else if (REGNO (e0
) == IP_REGNUM
&& arm_current_function_pac_enabled_p ())
30062 if (cfun
->machine
->pacspval_needed
)
30063 asm_fprintf (out_file
, "\t.pacspval\n");
30075 /* Emit unwind directives for the given insn. */
30078 arm_unwind_emit (FILE * out_file
, rtx_insn
*insn
)
30081 bool handled_one
= false;
30083 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
30086 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
30087 && (TREE_NOTHROW (current_function_decl
)
30088 || crtl
->all_throwers_are_sibcalls
))
30091 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
30094 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
30096 switch (REG_NOTE_KIND (note
))
30098 case REG_FRAME_RELATED_EXPR
:
30099 pat
= XEXP (note
, 0);
30102 case REG_CFA_REGISTER
:
30103 pat
= XEXP (note
, 0);
30106 pat
= PATTERN (insn
);
30107 if (GET_CODE (pat
) == PARALLEL
)
30108 pat
= XVECEXP (pat
, 0, 0);
30111 /* Only emitted for IS_STACKALIGN re-alignment. */
30116 src
= SET_SRC (pat
);
30117 dest
= SET_DEST (pat
);
30119 gcc_assert (src
== stack_pointer_rtx
30120 || IS_PAC_REGNUM (REGNO (src
)));
30121 reg
= REGNO (dest
);
30123 if (IS_PAC_REGNUM (REGNO (src
)))
30124 arm_unwind_emit_set (out_file
, PATTERN (insn
));
30126 asm_fprintf (out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
30129 handled_one
= true;
30132 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
30133 to get correct dwarf information for shrink-wrap. We should not
30134 emit unwind information for it because these are used either for
30135 pretend arguments or notes to adjust sp and restore registers from
30137 case REG_CFA_DEF_CFA
:
30138 case REG_CFA_ADJUST_CFA
:
30139 case REG_CFA_RESTORE
:
30142 case REG_CFA_EXPRESSION
:
30143 case REG_CFA_OFFSET
:
30144 /* ??? Only handling here what we actually emit. */
30145 gcc_unreachable ();
30153 pat
= PATTERN (insn
);
30156 switch (GET_CODE (pat
))
30159 arm_unwind_emit_set (out_file
, pat
);
30163 /* Store multiple. */
30164 arm_unwind_emit_sequence (out_file
, pat
);
30173 /* Output a reference from a function exception table to the type_info
30174 object X. The EABI specifies that the symbol should be relocated by
30175 an R_ARM_TARGET2 relocation. */
30178 arm_output_ttype (rtx x
)
30180 fputs ("\t.word\t", asm_out_file
);
30181 output_addr_const (asm_out_file
, x
);
30182 /* Use special relocations for symbol references. */
30183 if (!CONST_INT_P (x
))
30184 fputs ("(TARGET2)", asm_out_file
);
30185 fputc ('\n', asm_out_file
);
30190 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
30193 arm_asm_emit_except_personality (rtx personality
)
30195 fputs ("\t.personality\t", asm_out_file
);
30196 output_addr_const (asm_out_file
, personality
);
30197 fputc ('\n', asm_out_file
);
30199 #endif /* ARM_UNWIND_INFO */
30201 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
30204 arm_asm_init_sections (void)
30206 #if ARM_UNWIND_INFO
30207 exception_section
= get_unnamed_section (0, output_section_asm_op
,
30209 #endif /* ARM_UNWIND_INFO */
30211 #ifdef OBJECT_FORMAT_ELF
30212 if (target_pure_code
)
30213 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
30217 /* Output unwind directives for the start/end of a function. */
30220 arm_output_fn_unwind (FILE * f
, bool prologue
)
30222 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
30226 fputs ("\t.fnstart\n", f
);
30229 /* If this function will never be unwound, then mark it as such.
30230 The came condition is used in arm_unwind_emit to suppress
30231 the frame annotations. */
30232 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
30233 && (TREE_NOTHROW (current_function_decl
)
30234 || crtl
->all_throwers_are_sibcalls
))
30235 fputs("\t.cantunwind\n", f
);
30237 fputs ("\t.fnend\n", f
);
30242 arm_emit_tls_decoration (FILE *fp
, rtx x
)
30244 enum tls_reloc reloc
;
30247 val
= XVECEXP (x
, 0, 0);
30248 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
30250 output_addr_const (fp
, val
);
30255 fputs ("(tlsgd)", fp
);
30257 case TLS_GD32_FDPIC
:
30258 fputs ("(tlsgd_fdpic)", fp
);
30261 fputs ("(tlsldm)", fp
);
30263 case TLS_LDM32_FDPIC
:
30264 fputs ("(tlsldm_fdpic)", fp
);
30267 fputs ("(tlsldo)", fp
);
30270 fputs ("(gottpoff)", fp
);
30272 case TLS_IE32_FDPIC
:
30273 fputs ("(gottpoff_fdpic)", fp
);
30276 fputs ("(tpoff)", fp
);
30279 fputs ("(tlsdesc)", fp
);
30282 gcc_unreachable ();
30291 fputs (" + (. - ", fp
);
30292 output_addr_const (fp
, XVECEXP (x
, 0, 2));
30293 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30294 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
30295 output_addr_const (fp
, XVECEXP (x
, 0, 3));
30305 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
30308 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
30310 gcc_assert (size
== 4);
30311 fputs ("\t.word\t", file
);
30312 output_addr_const (file
, x
);
30313 fputs ("(tlsldo)", file
);
30316 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
30319 arm_output_addr_const_extra (FILE *fp
, rtx x
)
30321 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
30322 return arm_emit_tls_decoration (fp
, x
);
30323 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
30326 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
30328 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
30329 assemble_name_raw (fp
, label
);
30333 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
30335 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
30339 output_addr_const (fp
, XVECEXP (x
, 0, 0));
30343 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
30345 output_addr_const (fp
, XVECEXP (x
, 0, 0));
30349 output_addr_const (fp
, XVECEXP (x
, 0, 1));
30353 else if (GET_CODE (x
) == CONST_VECTOR
)
30354 return arm_emit_vector_const (fp
, x
);
30359 /* Output assembly for a shift instruction.
30360 SET_FLAGS determines how the instruction modifies the condition codes.
30361 0 - Do not set condition codes.
30362 1 - Set condition codes.
30363 2 - Use smallest instruction. */
30365 arm_output_shift(rtx
* operands
, int set_flags
)
30368 static const char flag_chars
[3] = {'?', '.', '!'};
30373 c
= flag_chars
[set_flags
];
30374 shift
= shift_op(operands
[3], &val
);
30378 operands
[2] = GEN_INT(val
);
30379 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
30382 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
30384 output_asm_insn (pattern
, operands
);
30388 /* Output assembly for a WMMX immediate shift instruction. */
30390 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
30392 int shift
= INTVAL (operands
[2]);
30394 machine_mode opmode
= GET_MODE (operands
[0]);
30396 gcc_assert (shift
>= 0);
30398 /* If the shift value in the register versions is > 63 (for D qualifier),
30399 31 (for W qualifier) or 15 (for H qualifier). */
30400 if (((opmode
== V4HImode
) && (shift
> 15))
30401 || ((opmode
== V2SImode
) && (shift
> 31))
30402 || ((opmode
== DImode
) && (shift
> 63)))
30406 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
30407 output_asm_insn (templ
, operands
);
30408 if (opmode
== DImode
)
30410 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
30411 output_asm_insn (templ
, operands
);
30416 /* The destination register will contain all zeros. */
30417 sprintf (templ
, "wzero\t%%0");
30418 output_asm_insn (templ
, operands
);
30423 if ((opmode
== DImode
) && (shift
> 32))
30425 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
30426 output_asm_insn (templ
, operands
);
30427 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
30428 output_asm_insn (templ
, operands
);
30432 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
30433 output_asm_insn (templ
, operands
);
30438 /* Output assembly for a WMMX tinsr instruction. */
30440 arm_output_iwmmxt_tinsr (rtx
*operands
)
30442 int mask
= INTVAL (operands
[3]);
30445 int units
= mode_nunits
[GET_MODE (operands
[0])];
30446 gcc_assert ((mask
& (mask
- 1)) == 0);
30447 for (i
= 0; i
< units
; ++i
)
30449 if ((mask
& 0x01) == 1)
30455 gcc_assert (i
< units
);
30457 switch (GET_MODE (operands
[0]))
30460 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
30463 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
30466 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
30469 gcc_unreachable ();
30472 output_asm_insn (templ
, operands
);
30477 /* Output an arm casesi dispatch sequence. Used by arm_casesi_internal insn.
30478 Responsible for the handling of switch statements in arm. */
30480 arm_output_casesi (rtx
*operands
)
30483 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
30484 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
30485 output_asm_insn ("cmp\t%0, %1", operands
);
30486 output_asm_insn ("bhi\t%l3", operands
);
30487 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
30488 switch (GET_MODE (diff_vec
))
30491 if (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
)
30492 output_asm_insn ("ldrb\t%4, [%5, %0]", operands
);
30494 output_asm_insn ("ldrsb\t%4, [%5, %0]", operands
);
30495 output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands
);
30498 if (REGNO (operands
[4]) != REGNO (operands
[5]))
30500 output_asm_insn ("add\t%4, %0, %0", operands
);
30501 if (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
)
30502 output_asm_insn ("ldrh\t%4, [%5, %4]", operands
);
30504 output_asm_insn ("ldrsh\t%4, [%5, %4]", operands
);
30508 output_asm_insn ("add\t%4, %5, %0", operands
);
30509 if (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
)
30510 output_asm_insn ("ldrh\t%4, [%4, %0]", operands
);
30512 output_asm_insn ("ldrsh\t%4, [%4, %0]", operands
);
30514 output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands
);
30519 output_asm_insn ("ldr\t%4, [%5, %0, lsl #2]", operands
);
30520 output_asm_insn ("add\t%|pc, %|pc, %4", operands
);
30523 output_asm_insn ("ldr\t%|pc, [%5, %0, lsl #2]", operands
);
30526 gcc_unreachable ();
30528 assemble_label (asm_out_file
, label
);
30529 output_asm_insn ("nop", operands
);
30533 /* Output a Thumb-1 casesi dispatch sequence. */
30535 thumb1_output_casesi (rtx
*operands
)
30537 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
30539 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
30541 switch (GET_MODE(diff_vec
))
30544 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
30545 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30547 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
30548 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30550 return "bl\t%___gnu_thumb1_case_si";
30552 gcc_unreachable ();
30556 /* Output a Thumb-2 casesi instruction. */
30558 thumb2_output_casesi (rtx
*operands
)
30560 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
30562 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
30564 output_asm_insn ("cmp\t%0, %1", operands
);
30565 output_asm_insn ("bhi\t%l3", operands
);
30566 switch (GET_MODE(diff_vec
))
30569 return "tbb\t[%|pc, %0]";
30571 return "tbh\t[%|pc, %0, lsl #1]";
30575 output_asm_insn ("adr\t%4, %l2", operands
);
30576 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
30577 output_asm_insn ("add\t%4, %4, %5", operands
);
30582 output_asm_insn ("adr\t%4, %l2", operands
);
30583 return "ldr\t%|pc, [%4, %0, lsl #2]";
30586 gcc_unreachable ();
30590 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
30591 per-core tuning structs. */
30593 arm_issue_rate (void)
30595 return current_tune
->issue_rate
;
30598 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
30600 arm_sched_variable_issue (FILE *, int, rtx_insn
*insn
, int more
)
30602 if (DEBUG_INSN_P (insn
))
30605 rtx_code code
= GET_CODE (PATTERN (insn
));
30606 if (code
== USE
|| code
== CLOBBER
)
30609 if (get_attr_type (insn
) == TYPE_NO_INSN
)
30615 /* Return how many instructions should scheduler lookahead to choose the
30618 arm_first_cycle_multipass_dfa_lookahead (void)
30620 int issue_rate
= arm_issue_rate ();
30622 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
30625 /* Enable modeling of L2 auto-prefetcher. */
30627 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
30629 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
30633 arm_mangle_type (const_tree type
)
30635 /* The ARM ABI documents (10th October 2008) say that "__va_list"
30636 has to be managled as if it is in the "std" namespace. */
30637 if (TARGET_AAPCS_BASED
30638 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
30639 return "St9__va_list";
30641 /* Half-precision floating point types. */
30642 if (SCALAR_FLOAT_TYPE_P (type
) && TYPE_PRECISION (type
) == 16)
30644 if (TYPE_MAIN_VARIANT (type
) == float16_type_node
)
30646 if (TYPE_MODE (type
) == BFmode
)
30652 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30654 if (TYPE_NAME (type
) != NULL
)
30655 return arm_mangle_builtin_type (type
);
30657 /* Use the default mangling. */
30661 /* Order of allocation of core registers for Thumb: this allocation is
30662 written over the corresponding initial entries of the array
30663 initialized with REG_ALLOC_ORDER. We allocate all low registers
30664 first. Saving and restoring a low register is usually cheaper than
30665 using a call-clobbered high register. */
30667 static const int thumb_core_reg_alloc_order
[] =
30669 3, 2, 1, 0, 4, 5, 6, 7,
30670 12, 14, 8, 9, 10, 11
30673 /* Adjust register allocation order when compiling for Thumb. */
30676 arm_order_regs_for_local_alloc (void)
30678 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
30679 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
30681 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
30682 sizeof (thumb_core_reg_alloc_order
));
30685 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30688 arm_frame_pointer_required (void)
30690 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
30693 /* If the function receives nonlocal gotos, it needs to save the frame
30694 pointer in the nonlocal_goto_save_area object. */
30695 if (cfun
->has_nonlocal_label
)
30698 /* The frame pointer is required for non-leaf APCS frames. */
30699 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
30702 /* If we are probing the stack in the prologue, we will have a faulting
30703 instruction prior to the stack adjustment and this requires a frame
30704 pointer if we want to catch the exception using the EABI unwinder. */
30705 if (!IS_INTERRUPT (arm_current_func_type ())
30706 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
30707 || flag_stack_clash_protection
)
30708 && arm_except_unwind_info (&global_options
) == UI_TARGET
30709 && cfun
->can_throw_non_call_exceptions
)
30711 HOST_WIDE_INT size
= get_frame_size ();
30713 /* That's irrelevant if there is no stack adjustment. */
30717 /* That's relevant only if there is a stack probe. */
30718 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
30720 /* We don't have the final size of the frame so adjust. */
30721 size
+= 32 * UNITS_PER_WORD
;
30722 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
30732 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30733 All modes except THUMB1 have conditional execution.
30734 If we have conditional arithmetic, return false before reload to
30735 enable some ifcvt transformations. */
30737 arm_have_conditional_execution (void)
30739 bool has_cond_exec
, enable_ifcvt_trans
;
30741 /* Only THUMB1 cannot support conditional execution. */
30742 has_cond_exec
= !TARGET_THUMB1
;
30744 /* Enable ifcvt transformations if we have conditional arithmetic, but only
30746 enable_ifcvt_trans
= TARGET_COND_ARITH
&& !reload_completed
;
30748 return has_cond_exec
&& !enable_ifcvt_trans
;
30751 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30752 static HOST_WIDE_INT
30753 arm_vector_alignment (const_tree type
)
30755 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
30757 if (TARGET_AAPCS_BASED
)
30758 align
= MIN (align
, 64);
30763 static unsigned int
30764 arm_autovectorize_vector_modes (vector_modes
*modes
, bool)
30766 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
30768 modes
->safe_push (V16QImode
);
30769 modes
->safe_push (V8QImode
);
30775 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
30777 /* Vectors which aren't in packed structures will not be less aligned than
30778 the natural alignment of their element type, so this is safe. */
30779 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30782 return default_builtin_vector_alignment_reachable (type
, is_packed
);
30786 arm_builtin_support_vector_misalignment (machine_mode mode
,
30787 const_tree type
, int misalignment
,
30790 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30792 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
30797 /* If the misalignment is unknown, we should be able to handle the access
30798 so long as it is not to a member of a packed data structure. */
30799 if (misalignment
== -1)
30802 /* Return true if the misalignment is a multiple of the natural alignment
30803 of the vector's element type. This is probably always going to be
30804 true in practice, since we've already established that this isn't a
30806 return ((misalignment
% align
) == 0);
30809 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
30814 arm_conditional_register_usage (void)
30818 if (TARGET_THUMB1
&& optimize_size
)
30820 /* When optimizing for size on Thumb-1, it's better not
30821 to use the HI regs, because of the overhead of
30823 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
30824 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
30827 /* The link register can be clobbered by any branch insn,
30828 but we have no way to track that at present, so mark
30829 it as unavailable. */
30831 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
30833 if (TARGET_32BIT
&& TARGET_VFP_BASE
)
30835 /* VFPv3 registers are disabled when earlier VFP
30836 versions are selected due to the definition of
30837 LAST_VFP_REGNUM. */
30838 for (regno
= FIRST_VFP_REGNUM
;
30839 regno
<= LAST_VFP_REGNUM
; ++ regno
)
30841 fixed_regs
[regno
] = 0;
30842 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
30843 || regno
>= FIRST_VFP_REGNUM
+ 32;
30845 if (TARGET_HAVE_MVE
)
30846 fixed_regs
[VPR_REGNUM
] = 0;
30849 if (TARGET_REALLY_IWMMXT
&& !TARGET_GENERAL_REGS_ONLY
)
30851 regno
= FIRST_IWMMXT_GR_REGNUM
;
30852 /* The 2002/10/09 revision of the XScale ABI has wCG0
30853 and wCG1 as call-preserved registers. The 2002/11/21
30854 revision changed this so that all wCG registers are
30855 scratch registers. */
30856 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
30857 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
30858 fixed_regs
[regno
] = 0;
30859 /* The XScale ABI has wR0 - wR9 as scratch registers,
30860 the rest as call-preserved registers. */
30861 for (regno
= FIRST_IWMMXT_REGNUM
;
30862 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
30864 fixed_regs
[regno
] = 0;
30865 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
30869 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
30871 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30872 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30874 else if (TARGET_APCS_STACK
)
30876 fixed_regs
[10] = 1;
30877 call_used_regs
[10] = 1;
30879 /* -mcaller-super-interworking reserves r11 for calls to
30880 _interwork_r11_call_via_rN(). Making the register global
30881 is an easy way of ensuring that it remains valid for all
30883 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
30884 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
30886 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30887 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30888 if (TARGET_CALLER_INTERWORKING
)
30889 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30892 /* The Q and GE bits are only accessed via special ACLE patterns. */
30893 CLEAR_HARD_REG_BIT (operand_reg_set
, APSRQ_REGNUM
);
30894 CLEAR_HARD_REG_BIT (operand_reg_set
, APSRGE_REGNUM
);
30896 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30900 arm_preferred_rename_class (reg_class_t rclass
)
30902 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30903 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30904 and code size can be reduced. */
30905 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
30911 /* Compute the attribute "length" of insn "*push_multi".
30912 So this function MUST be kept in sync with that insn pattern. */
30914 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
30916 int i
, regno
, hi_reg
;
30917 int num_saves
= XVECLEN (parallel_op
, 0);
30927 regno
= REGNO (first_op
);
30928 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30929 list is 8-bit. Normally this means all registers in the list must be
30930 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
30931 encodings. There is one exception for PUSH that LR in HI_REGS can be used
30932 with 16-bit encoding. */
30933 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30934 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
30936 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
30937 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30945 /* Compute the attribute "length" of insn. Currently, this function is used
30946 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30947 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
30948 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
30949 true if OPERANDS contains insn which explicit updates base register. */
30952 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
30961 rtx parallel_op
= operands
[0];
30962 /* Initialize to elements number of PARALLEL. */
30963 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
30964 /* Initialize the value to base register. */
30965 unsigned regno
= REGNO (operands
[1]);
30966 /* Skip return and write back pattern.
30967 We only need register pop pattern for later analysis. */
30968 unsigned first_indx
= 0;
30969 first_indx
+= return_pc
? 1 : 0;
30970 first_indx
+= write_back_p
? 1 : 0;
30972 /* A pop operation can be done through LDM or POP. If the base register is SP
30973 and if it's with write back, then a LDM will be alias of POP. */
30974 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
30975 bool ldm_p
= !pop_p
;
30977 /* Check base register for LDM. */
30978 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
30981 /* Check each register in the list. */
30982 for (; indx
>= first_indx
; indx
--)
30984 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
30985 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
30986 comment in arm_attr_length_push_multi. */
30987 if (REGNO_REG_CLASS (regno
) == HI_REGS
30988 && (regno
!= PC_REGNUM
|| ldm_p
))
30995 /* Compute the number of instructions emitted by output_move_double. */
30997 arm_count_output_move_double_insns (rtx
*operands
)
31001 /* output_move_double may modify the operands array, so call it
31002 here on a copy of the array. */
31003 ops
[0] = operands
[0];
31004 ops
[1] = operands
[1];
31005 output_move_double (ops
, false, &count
);
31009 /* Same as above, but operands are a register/memory pair in SImode.
31010 Assumes operands has the base register in position 0 and memory in position
31011 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
31013 arm_count_ldrdstrd_insns (rtx
*operands
, bool load
)
31017 int regnum
, memnum
;
31019 regnum
= 0, memnum
= 1;
31021 regnum
= 1, memnum
= 0;
31022 ops
[regnum
] = gen_rtx_REG (DImode
, REGNO (operands
[0]));
31023 ops
[memnum
] = adjust_address (operands
[2], DImode
, 0);
31024 output_move_double (ops
, false, &count
);
31030 vfp3_const_double_for_fract_bits (rtx operand
)
31032 REAL_VALUE_TYPE r0
;
31034 if (!CONST_DOUBLE_P (operand
))
31037 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
31038 if (exact_real_inverse (DFmode
, &r0
)
31039 && !REAL_VALUE_NEGATIVE (r0
))
31041 if (exact_real_truncate (DFmode
, &r0
))
31043 HOST_WIDE_INT value
= real_to_integer (&r0
);
31044 value
= value
& 0xffffffff;
31045 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
31047 int ret
= exact_log2 (value
);
31048 gcc_assert (IN_RANGE (ret
, 0, 31));
31056 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
31057 log2 is in [1, 32], return that log2. Otherwise return -1.
31058 This is used in the patterns for vcvt.s32.f32 floating-point to
31059 fixed-point conversions. */
31062 vfp3_const_double_for_bits (rtx x
)
31064 const REAL_VALUE_TYPE
*r
;
31066 if (!CONST_DOUBLE_P (x
))
31069 r
= CONST_DOUBLE_REAL_VALUE (x
);
31071 if (REAL_VALUE_NEGATIVE (*r
)
31072 || REAL_VALUE_ISNAN (*r
)
31073 || REAL_VALUE_ISINF (*r
)
31074 || !real_isinteger (r
, SFmode
))
31077 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
31079 /* The exact_log2 above will have returned -1 if this is
31080 not an exact log2. */
31081 if (!IN_RANGE (hwint
, 1, 32))
31088 /* Emit a memory barrier around an atomic sequence according to MODEL. */
31091 arm_pre_atomic_barrier (enum memmodel model
)
31093 if (need_atomic_barrier_p (model
, true))
31094 emit_insn (gen_memory_barrier ());
31098 arm_post_atomic_barrier (enum memmodel model
)
31100 if (need_atomic_barrier_p (model
, false))
31101 emit_insn (gen_memory_barrier ());
31104 /* Emit the load-exclusive and store-exclusive instructions.
31105 Use acquire and release versions if necessary. */
31108 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
31110 rtx (*gen
) (rtx
, rtx
);
31116 case E_QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
31117 case E_HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
31118 case E_SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
31119 case E_DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
31121 gcc_unreachable ();
31128 case E_QImode
: gen
= gen_arm_load_exclusiveqi
; break;
31129 case E_HImode
: gen
= gen_arm_load_exclusivehi
; break;
31130 case E_SImode
: gen
= gen_arm_load_exclusivesi
; break;
31131 case E_DImode
: gen
= gen_arm_load_exclusivedi
; break;
31133 gcc_unreachable ();
31137 emit_insn (gen (rval
, mem
));
31141 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
31144 rtx (*gen
) (rtx
, rtx
, rtx
);
31150 case E_QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
31151 case E_HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
31152 case E_SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
31153 case E_DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
31155 gcc_unreachable ();
31162 case E_QImode
: gen
= gen_arm_store_exclusiveqi
; break;
31163 case E_HImode
: gen
= gen_arm_store_exclusivehi
; break;
31164 case E_SImode
: gen
= gen_arm_store_exclusivesi
; break;
31165 case E_DImode
: gen
= gen_arm_store_exclusivedi
; break;
31167 gcc_unreachable ();
31171 emit_insn (gen (bval
, rval
, mem
));
31174 /* Mark the previous jump instruction as unlikely. */
31177 emit_unlikely_jump (rtx insn
)
31179 rtx_insn
*jump
= emit_jump_insn (insn
);
31180 add_reg_br_prob_note (jump
, profile_probability::very_unlikely ());
31183 /* Expand a compare and swap pattern. */
31186 arm_expand_compare_and_swap (rtx operands
[])
31188 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
31189 machine_mode mode
, cmp_mode
;
31191 bval
= operands
[0];
31192 rval
= operands
[1];
31194 oldval
= operands
[3];
31195 newval
= operands
[4];
31196 is_weak
= operands
[5];
31197 mod_s
= operands
[6];
31198 mod_f
= operands
[7];
31199 mode
= GET_MODE (mem
);
31201 /* Normally the succ memory model must be stronger than fail, but in the
31202 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
31203 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
31205 if (TARGET_HAVE_LDACQ
31206 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
31207 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
31208 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
31214 /* For narrow modes, we're going to perform the comparison in SImode,
31215 so do the zero-extension now. */
31216 rval
= gen_reg_rtx (SImode
);
31217 oldval
= convert_modes (SImode
, mode
, oldval
, true);
31221 /* Force the value into a register if needed. We waited until after
31222 the zero-extension above to do this properly. */
31223 if (!arm_add_operand (oldval
, SImode
))
31224 oldval
= force_reg (SImode
, oldval
);
31228 if (!cmpdi_operand (oldval
, mode
))
31229 oldval
= force_reg (mode
, oldval
);
31233 gcc_unreachable ();
31237 cmp_mode
= E_SImode
;
31239 cmp_mode
= CC_Zmode
;
31241 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
31242 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode
, mode
, bdst
, rval
, mem
,
31243 oldval
, newval
, is_weak
, mod_s
, mod_f
));
31245 if (mode
== QImode
|| mode
== HImode
)
31246 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
31248 /* In all cases, we arrange for success to be signaled by Z set.
31249 This arrangement allows for the boolean result to be used directly
31250 in a subsequent branch, post optimization. For Thumb-1 targets, the
31251 boolean negation of the result is also stored in bval because Thumb-1
31252 backend lacks dependency tracking for CC flag due to flag-setting not
31253 being represented at RTL level. */
31255 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
31258 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
31259 emit_insn (gen_rtx_SET (bval
, x
));
31263 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
31264 another memory store between the load-exclusive and store-exclusive can
31265 reset the monitor from Exclusive to Open state. This means we must wait
31266 until after reload to split the pattern, lest we get a register spill in
31267 the middle of the atomic sequence. Success of the compare and swap is
31268 indicated by the Z flag set for 32bit targets and by neg_bval being zero
31269 for Thumb-1 targets (ie. negation of the boolean value returned by
31270 atomic_compare_and_swapmode standard pattern in operand 0). */
31273 arm_split_compare_and_swap (rtx operands
[])
31275 rtx rval
, mem
, oldval
, newval
, neg_bval
, mod_s_rtx
;
31277 enum memmodel mod_s
, mod_f
;
31279 rtx_code_label
*label1
, *label2
;
31282 rval
= operands
[1];
31284 oldval
= operands
[3];
31285 newval
= operands
[4];
31286 is_weak
= (operands
[5] != const0_rtx
);
31287 mod_s_rtx
= operands
[6];
31288 mod_s
= memmodel_from_int (INTVAL (mod_s_rtx
));
31289 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
31290 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
31291 mode
= GET_MODE (mem
);
31293 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
31295 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (mod_s_rtx
);
31296 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (mod_s_rtx
);
31298 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
31299 a full barrier is emitted after the store-release. */
31301 use_acquire
= false;
31303 /* Checks whether a barrier is needed and emits one accordingly. */
31304 if (!(use_acquire
|| use_release
))
31305 arm_pre_atomic_barrier (mod_s
);
31310 label1
= gen_label_rtx ();
31311 emit_label (label1
);
31313 label2
= gen_label_rtx ();
31315 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
31317 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31318 as required to communicate with arm_expand_compare_and_swap. */
31321 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
31322 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
31323 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
31324 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
31325 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
31329 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
31330 if (thumb1_cmpneg_operand (oldval
, SImode
))
31333 if (!satisfies_constraint_L (oldval
))
31335 gcc_assert (satisfies_constraint_J (oldval
));
31337 /* For such immediates, ADDS needs the source and destination regs
31340 Normally this would be handled by RA, but this is all happening
31342 emit_move_insn (neg_bval
, rval
);
31346 emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval
, src
, oldval
,
31351 emit_move_insn (neg_bval
, const1_rtx
);
31352 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
31356 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
31358 /* Weak or strong, we want EQ to be true for success, so that we
31359 match the flags that we got from the compare above. */
31362 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
31363 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
31364 emit_insn (gen_rtx_SET (cond
, x
));
31369 /* Z is set to boolean value of !neg_bval, as required to communicate
31370 with arm_expand_compare_and_swap. */
31371 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
31372 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
31375 if (!is_mm_relaxed (mod_f
))
31376 emit_label (label2
);
31378 /* Checks whether a barrier is needed and emits one accordingly. */
31380 || !(use_acquire
|| use_release
))
31381 arm_post_atomic_barrier (mod_s
);
31383 if (is_mm_relaxed (mod_f
))
31384 emit_label (label2
);
31387 /* Split an atomic operation pattern. Operation is given by CODE and is one
31388 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31389 operation). Operation is performed on the content at MEM and on VALUE
31390 following the memory model MODEL_RTX. The content at MEM before and after
31391 the operation is returned in OLD_OUT and NEW_OUT respectively while the
31392 success of the operation is returned in COND. Using a scratch register or
31393 an operand register for these determines what result is returned for that
31397 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
31398 rtx value
, rtx model_rtx
, rtx cond
)
31400 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
31401 machine_mode mode
= GET_MODE (mem
);
31402 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
31403 rtx_code_label
*label
;
31404 bool all_low_regs
, bind_old_new
;
31407 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
31409 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (model_rtx
);
31410 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (model_rtx
);
31412 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
31413 a full barrier is emitted after the store-release. */
31415 use_acquire
= false;
31417 /* Checks whether a barrier is needed and emits one accordingly. */
31418 if (!(use_acquire
|| use_release
))
31419 arm_pre_atomic_barrier (model
);
31421 label
= gen_label_rtx ();
31422 emit_label (label
);
31425 new_out
= gen_lowpart (wmode
, new_out
);
31427 old_out
= gen_lowpart (wmode
, old_out
);
31430 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
31432 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
31434 /* Does the operation require destination and first operand to use the same
31435 register? This is decided by register constraints of relevant insn
31436 patterns in thumb1.md. */
31437 gcc_assert (!new_out
|| REG_P (new_out
));
31438 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
31439 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
31440 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
31445 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
31447 /* We want to return the old value while putting the result of the operation
31448 in the same register as the old value so copy the old value over to the
31449 destination register and use that register for the operation. */
31450 if (old_out
&& bind_old_new
)
31452 emit_move_insn (new_out
, old_out
);
31463 x
= gen_rtx_AND (wmode
, old_out
, value
);
31464 emit_insn (gen_rtx_SET (new_out
, x
));
31465 x
= gen_rtx_NOT (wmode
, new_out
);
31466 emit_insn (gen_rtx_SET (new_out
, x
));
31470 if (CONST_INT_P (value
))
31472 value
= gen_int_mode (-INTVAL (value
), wmode
);
31478 if (mode
== DImode
)
31480 /* DImode plus/minus need to clobber flags. */
31481 /* The adddi3 and subdi3 patterns are incorrectly written so that
31482 they require matching operands, even when we could easily support
31483 three operands. Thankfully, this can be fixed up post-splitting,
31484 as the individual add+adc patterns do accept three operands and
31485 post-reload cprop can make these moves go away. */
31486 emit_move_insn (new_out
, old_out
);
31488 x
= gen_adddi3 (new_out
, new_out
, value
);
31490 x
= gen_subdi3 (new_out
, new_out
, value
);
31497 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
31498 emit_insn (gen_rtx_SET (new_out
, x
));
31502 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
31505 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
31506 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
31508 /* Checks whether a barrier is needed and emits one accordingly. */
31510 || !(use_acquire
|| use_release
))
31511 arm_post_atomic_barrier (model
);
31514 /* Return the mode for the MVE vector of predicates corresponding to MODE. */
31516 arm_mode_to_pred_mode (machine_mode mode
)
31518 switch (GET_MODE_NUNITS (mode
))
31520 case 16: return V16BImode
;
31521 case 8: return V8BImode
;
31522 case 4: return V4BImode
;
31523 case 2: return V2QImode
;
31525 return opt_machine_mode ();
31528 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31529 If CAN_INVERT, store either the result or its inverse in TARGET
31530 and return true if TARGET contains the inverse. If !CAN_INVERT,
31531 always store the result in TARGET, never its inverse.
31533 Note that the handling of floating-point comparisons is not
31537 arm_expand_vector_compare (rtx target
, rtx_code code
, rtx op0
, rtx op1
,
31540 machine_mode cmp_result_mode
= GET_MODE (target
);
31541 machine_mode cmp_mode
= GET_MODE (op0
);
31545 /* MVE supports more comparisons than Neon. */
31546 if (TARGET_HAVE_MVE
)
31551 /* For these we need to compute the inverse of the requested
31560 code
= reverse_condition_maybe_unordered (code
);
31563 /* Recursively emit the inverted comparison into a temporary
31564 and then store its inverse in TARGET. This avoids reusing
31565 TARGET (which for integer NE could be one of the inputs). */
31566 rtx tmp
= gen_reg_rtx (cmp_result_mode
);
31567 if (arm_expand_vector_compare (tmp
, code
, op0
, op1
, true))
31568 gcc_unreachable ();
31569 emit_insn (gen_rtx_SET (target
, gen_rtx_NOT (cmp_result_mode
, tmp
)));
31582 /* These are natively supported by Neon for zero comparisons, but otherwise
31583 require the operands to be swapped. For MVE, we can only compare
31587 if (!TARGET_HAVE_MVE
)
31588 if (op1
!= CONST0_RTX (cmp_mode
))
31590 code
= swap_condition (code
);
31591 std::swap (op0
, op1
);
31593 /* Fall through. */
31595 /* These are natively supported by Neon for both register and zero
31596 operands. MVE supports registers only. */
31601 if (TARGET_HAVE_MVE
)
31603 switch (GET_MODE_CLASS (cmp_mode
))
31605 case MODE_VECTOR_INT
:
31606 emit_insn (gen_mve_vcmpq (code
, cmp_mode
, target
,
31607 op0
, force_reg (cmp_mode
, op1
)));
31609 case MODE_VECTOR_FLOAT
:
31610 if (TARGET_HAVE_MVE_FLOAT
)
31611 emit_insn (gen_mve_vcmpq_f (code
, cmp_mode
, target
,
31612 op0
, force_reg (cmp_mode
, op1
)));
31614 gcc_unreachable ();
31617 gcc_unreachable ();
31621 emit_insn (gen_neon_vc (code
, cmp_mode
, target
, op0
, op1
));
31624 /* These are natively supported for register operands only.
31625 Comparisons with zero aren't useful and should be folded
31626 or canonicalized by target-independent code. */
31629 if (TARGET_HAVE_MVE
)
31630 emit_insn (gen_mve_vcmpq (code
, cmp_mode
, target
,
31631 op0
, force_reg (cmp_mode
, op1
)));
31633 emit_insn (gen_neon_vc (code
, cmp_mode
, target
,
31634 op0
, force_reg (cmp_mode
, op1
)));
31637 /* These require the operands to be swapped and likewise do not
31638 support comparisons with zero. */
31641 if (TARGET_HAVE_MVE
)
31642 emit_insn (gen_mve_vcmpq (swap_condition (code
), cmp_mode
, target
,
31643 force_reg (cmp_mode
, op1
), op0
));
31645 emit_insn (gen_neon_vc (swap_condition (code
), cmp_mode
,
31646 target
, force_reg (cmp_mode
, op1
), op0
));
31649 /* These need a combination of two comparisons. */
31653 /* Operands are LTGT iff (a > b || a > b).
31654 Operands are ORDERED iff (a > b || a <= b). */
31655 rtx gt_res
= gen_reg_rtx (cmp_result_mode
);
31656 rtx alt_res
= gen_reg_rtx (cmp_result_mode
);
31657 rtx_code alt_code
= (code
== LTGT
? LT
: LE
);
31658 if (arm_expand_vector_compare (gt_res
, GT
, op0
, op1
, true)
31659 || arm_expand_vector_compare (alt_res
, alt_code
, op0
, op1
, true))
31660 gcc_unreachable ();
31661 emit_insn (gen_rtx_SET (target
, gen_rtx_IOR (cmp_result_mode
,
31662 gt_res
, alt_res
)));
31667 gcc_unreachable ();
31671 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31672 CMP_RESULT_MODE is the mode of the comparison result. */
31675 arm_expand_vcond (rtx
*operands
, machine_mode cmp_result_mode
)
31677 /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31678 arm_expand_vector_compare, and another one here. */
31681 if (TARGET_HAVE_MVE
)
31682 mask
= gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode
).require ());
31684 mask
= gen_reg_rtx (cmp_result_mode
);
31686 bool inverted
= arm_expand_vector_compare (mask
, GET_CODE (operands
[3]),
31687 operands
[4], operands
[5], true);
31689 std::swap (operands
[1], operands
[2]);
31691 emit_insn (gen_neon_vbsl (GET_MODE (operands
[0]), operands
[0],
31692 mask
, operands
[1], operands
[2]));
31695 machine_mode cmp_mode
= GET_MODE (operands
[0]);
31697 switch (GET_MODE_CLASS (cmp_mode
))
31699 case MODE_VECTOR_INT
:
31700 emit_insn (gen_mve_q (VPSELQ_S
, VPSELQ_S
, cmp_mode
, operands
[0],
31701 operands
[1], operands
[2], mask
));
31703 case MODE_VECTOR_FLOAT
:
31704 if (TARGET_HAVE_MVE_FLOAT
)
31705 emit_insn (gen_mve_q_f (VPSELQ_F
, cmp_mode
, operands
[0],
31706 operands
[1], operands
[2], mask
));
31708 gcc_unreachable ();
31711 gcc_unreachable ();
31716 #define MAX_VECT_LEN 16
31718 struct expand_vec_perm_d
31720 rtx target
, op0
, op1
;
31721 vec_perm_indices perm
;
31722 machine_mode vmode
;
31727 /* Generate a variable permutation. */
31730 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
31732 machine_mode vmode
= GET_MODE (target
);
31733 bool one_vector_p
= rtx_equal_p (op0
, op1
);
31735 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
31736 gcc_checking_assert (GET_MODE (op0
) == vmode
);
31737 gcc_checking_assert (GET_MODE (op1
) == vmode
);
31738 gcc_checking_assert (GET_MODE (sel
) == vmode
);
31739 gcc_checking_assert (TARGET_NEON
);
31743 if (vmode
== V8QImode
)
31744 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
31746 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
31752 if (vmode
== V8QImode
)
31754 pair
= gen_reg_rtx (V16QImode
);
31755 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
31756 pair
= gen_lowpart (TImode
, pair
);
31757 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
31761 pair
= gen_reg_rtx (OImode
);
31762 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
31763 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
31769 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
31771 machine_mode vmode
= GET_MODE (target
);
31772 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
31773 bool one_vector_p
= rtx_equal_p (op0
, op1
);
31776 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31777 numbering of elements for big-endian, we must reverse the order. */
31778 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
31780 /* The VTBL instruction does not use a modulo index, so we must take care
31781 of that ourselves. */
31782 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31783 mask
= gen_const_vec_duplicate (vmode
, mask
);
31784 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
31786 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
31789 /* Map lane ordering between architectural lane order, and GCC lane order,
31790 taking into account ABI. See comment above output_move_neon for details. */
31793 neon_endian_lane_map (machine_mode mode
, int lane
)
31795 if (BYTES_BIG_ENDIAN
)
31797 int nelems
= GET_MODE_NUNITS (mode
);
31798 /* Reverse lane order. */
31799 lane
= (nelems
- 1 - lane
);
31800 /* Reverse D register order, to match ABI. */
31801 if (GET_MODE_SIZE (mode
) == 16)
31802 lane
= lane
^ (nelems
/ 2);
31807 /* Some permutations index into pairs of vectors, this is a helper function
31808 to map indexes into those pairs of vectors. */
31811 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
31813 int nelem
= GET_MODE_NUNITS (mode
);
31814 if (BYTES_BIG_ENDIAN
)
31816 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
31820 /* Generate or test for an insn that supports a constant permutation. */
31822 /* Recognize patterns for the VUZP insns. */
31825 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
31827 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
31828 rtx out0
, out1
, in0
, in1
;
31832 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
31835 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31836 big endian pattern on 64 bit vectors, so we correct for that. */
31837 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
31838 && GET_MODE_SIZE (d
->vmode
) == 8 ? nelt
: 0;
31840 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
31842 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
31844 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
31848 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31850 for (i
= 0; i
< nelt
; i
++)
31853 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
31854 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
31864 if (swap_nelt
!= 0)
31865 std::swap (in0
, in1
);
31868 out1
= gen_reg_rtx (d
->vmode
);
31870 std::swap (out0
, out1
);
31872 emit_insn (gen_neon_vuzp_internal (d
->vmode
, out0
, in0
, in1
, out1
));
31876 /* Recognize patterns for the VZIP insns. */
31879 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
31881 unsigned int i
, high
, mask
, nelt
= d
->perm
.length ();
31882 rtx out0
, out1
, in0
, in1
;
31886 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
31889 is_swapped
= BYTES_BIG_ENDIAN
;
31891 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
31894 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
31896 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
31900 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31902 for (i
= 0; i
< nelt
/ 2; i
++)
31905 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
31906 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
31910 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
31911 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
31923 std::swap (in0
, in1
);
31926 out1
= gen_reg_rtx (d
->vmode
);
31928 std::swap (out0
, out1
);
31930 emit_insn (gen_neon_vzip_internal (d
->vmode
, out0
, in0
, in1
, out1
));
31934 /* Recognize patterns for the VREV insns. */
31936 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
31938 unsigned int i
, j
, diff
, nelt
= d
->perm
.length ();
31939 rtx (*gen
) (machine_mode
, rtx
, rtx
);
31941 if (!d
->one_vector_p
)
31952 gen
= gen_neon_vrev64
;
31963 gen
= gen_neon_vrev32
;
31969 gen
= gen_neon_vrev64
;
31980 gen
= gen_neon_vrev16
;
31984 gen
= gen_neon_vrev32
;
31990 gen
= gen_neon_vrev64
;
32000 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
32001 for (j
= 0; j
<= diff
; j
+= 1)
32003 /* This is guaranteed to be true as the value of diff
32004 is 7, 3, 1 and we should have enough elements in the
32005 queue to generate this. Getting a vector mask with a
32006 value of diff other than these values implies that
32007 something is wrong by the time we get here. */
32008 gcc_assert (i
+ j
< nelt
);
32009 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
32017 emit_insn (gen (d
->vmode
, d
->target
, d
->op0
));
32021 /* Recognize patterns for the VTRN insns. */
32024 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
32026 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
32027 rtx out0
, out1
, in0
, in1
;
32029 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
32032 /* Note that these are little-endian tests. Adjust for big-endian later. */
32033 if (d
->perm
[0] == 0)
32035 else if (d
->perm
[0] == 1)
32039 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
32041 for (i
= 0; i
< nelt
; i
+= 2)
32043 if (d
->perm
[i
] != i
+ odd
)
32045 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
32055 if (BYTES_BIG_ENDIAN
)
32057 std::swap (in0
, in1
);
32062 out1
= gen_reg_rtx (d
->vmode
);
32064 std::swap (out0
, out1
);
32066 emit_insn (gen_neon_vtrn_internal (d
->vmode
, out0
, in0
, in1
, out1
));
32070 /* Recognize patterns for the VEXT insns. */
32073 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
32075 unsigned int i
, nelt
= d
->perm
.length ();
32078 unsigned int location
;
32080 unsigned int next
= d
->perm
[0] + 1;
32082 /* TODO: Handle GCC's numbering of elements for big-endian. */
32083 if (BYTES_BIG_ENDIAN
)
32086 /* Check if the extracted indexes are increasing by one. */
32087 for (i
= 1; i
< nelt
; next
++, i
++)
32089 /* If we hit the most significant element of the 2nd vector in
32090 the previous iteration, no need to test further. */
32091 if (next
== 2 * nelt
)
32094 /* If we are operating on only one vector: it could be a
32095 rotation. If there are only two elements of size < 64, let
32096 arm_evpc_neon_vrev catch it. */
32097 if (d
->one_vector_p
&& (next
== nelt
))
32099 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
32105 if (d
->perm
[i
] != next
)
32109 location
= d
->perm
[0];
32115 offset
= GEN_INT (location
);
32117 if(d
->vmode
== E_DImode
)
32120 emit_insn (gen_neon_vext (d
->vmode
, d
->target
, d
->op0
, d
->op1
, offset
));
32124 /* The NEON VTBL instruction is a fully variable permuation that's even
32125 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
32126 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
32127 can do slightly better by expanding this as a constant where we don't
32128 have to apply a mask. */
32131 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
32133 rtx rperm
[MAX_VECT_LEN
], sel
;
32134 machine_mode vmode
= d
->vmode
;
32135 unsigned int i
, nelt
= d
->perm
.length ();
32137 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
32138 numbering of elements for big-endian, we must reverse the order. */
32139 if (BYTES_BIG_ENDIAN
)
32145 /* Generic code will try constant permutation twice. Once with the
32146 original mode and again with the elements lowered to QImode.
32147 So wait and don't do the selector expansion ourselves. */
32148 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
32151 for (i
= 0; i
< nelt
; ++i
)
32152 rperm
[i
] = GEN_INT (d
->perm
[i
]);
32153 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
32154 sel
= force_reg (vmode
, sel
);
32156 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
32161 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
32163 /* Check if the input mask matches vext before reordering the
32166 if (arm_evpc_neon_vext (d
))
32169 /* The pattern matching functions above are written to look for a small
32170 number to begin the sequence (0, 1, N/2). If we begin with an index
32171 from the second operand, we can swap the operands. */
32172 unsigned int nelt
= d
->perm
.length ();
32173 if (d
->perm
[0] >= nelt
)
32175 d
->perm
.rotate_inputs (1);
32176 std::swap (d
->op0
, d
->op1
);
32181 if (arm_evpc_neon_vuzp (d
))
32183 if (arm_evpc_neon_vzip (d
))
32185 if (arm_evpc_neon_vrev (d
))
32187 if (arm_evpc_neon_vtrn (d
))
32189 return arm_evpc_neon_vtbl (d
);
32194 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
32197 arm_vectorize_vec_perm_const (machine_mode vmode
, machine_mode op_mode
,
32198 rtx target
, rtx op0
, rtx op1
,
32199 const vec_perm_indices
&sel
)
32201 if (vmode
!= op_mode
)
32204 struct expand_vec_perm_d d
;
32205 int i
, nelt
, which
;
32207 if (!VALID_NEON_DREG_MODE (vmode
) && !VALID_NEON_QREG_MODE (vmode
))
32213 rtx nop0
= force_reg (vmode
, op0
);
32219 op1
= force_reg (vmode
, op1
);
32224 gcc_assert (VECTOR_MODE_P (d
.vmode
));
32225 d
.testing_p
= !target
;
32227 nelt
= GET_MODE_NUNITS (d
.vmode
);
32228 for (i
= which
= 0; i
< nelt
; ++i
)
32230 int ei
= sel
[i
] & (2 * nelt
- 1);
32231 which
|= (ei
< nelt
? 1 : 2);
32240 d
.one_vector_p
= false;
32241 if (d
.testing_p
|| !rtx_equal_p (op0
, op1
))
32244 /* The elements of PERM do not suggest that only the first operand
32245 is used, but both operands are identical. Allow easier matching
32246 of the permutation by folding the permutation into the single
32251 d
.one_vector_p
= true;
32256 d
.one_vector_p
= true;
32260 d
.perm
.new_vector (sel
.encoding (), d
.one_vector_p
? 1 : 2, nelt
);
32263 return arm_expand_vec_perm_const_1 (&d
);
32265 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
32266 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
32267 if (!d
.one_vector_p
)
32268 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
32271 bool ret
= arm_expand_vec_perm_const_1 (&d
);
32278 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
32280 /* If we are soft float and we do not have ldrd
32281 then all auto increment forms are ok. */
32282 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
32287 /* Post increment and Pre Decrement are supported for all
32288 instruction forms except for vector forms. */
32291 if (VECTOR_MODE_P (mode
))
32293 if (code
!= ARM_PRE_DEC
)
32303 /* Without LDRD and mode size greater than
32304 word size, there is no point in auto-incrementing
32305 because ldm and stm will not have these forms. */
32306 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
32309 /* Vector and floating point modes do not support
32310 these auto increment forms. */
32311 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
32324 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32325 on ARM, since we know that shifts by negative amounts are no-ops.
32326 Additionally, the default expansion code is not available or suitable
32327 for post-reload insn splits (this can occur when the register allocator
32328 chooses not to do a shift in NEON).
32330 This function is used in both initial expand and post-reload splits, and
32331 handles all kinds of 64-bit shifts.
32333 Input requirements:
32334 - It is safe for the input and output to be the same register, but
32335 early-clobber rules apply for the shift amount and scratch registers.
32336 - Shift by register requires both scratch registers. In all other cases
32337 the scratch registers may be NULL.
32338 - Ashiftrt by a register also clobbers the CC register. */
32340 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
32341 rtx amount
, rtx scratch1
, rtx scratch2
)
32343 rtx out_high
= gen_highpart (SImode
, out
);
32344 rtx out_low
= gen_lowpart (SImode
, out
);
32345 rtx in_high
= gen_highpart (SImode
, in
);
32346 rtx in_low
= gen_lowpart (SImode
, in
);
32349 in = the register pair containing the input value.
32350 out = the destination register pair.
32351 up = the high- or low-part of each pair.
32352 down = the opposite part to "up".
32353 In a shift, we can consider bits to shift from "up"-stream to
32354 "down"-stream, so in a left-shift "up" is the low-part and "down"
32355 is the high-part of each register pair. */
32357 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
32358 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
32359 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
32360 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
32362 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
32364 && (REG_P (out
) || SUBREG_P (out
))
32365 && GET_MODE (out
) == DImode
);
32367 && (REG_P (in
) || SUBREG_P (in
))
32368 && GET_MODE (in
) == DImode
);
32370 && (((REG_P (amount
) || SUBREG_P (amount
))
32371 && GET_MODE (amount
) == SImode
)
32372 || CONST_INT_P (amount
)));
32373 gcc_assert (scratch1
== NULL
32374 || (GET_CODE (scratch1
) == SCRATCH
)
32375 || (GET_MODE (scratch1
) == SImode
32376 && REG_P (scratch1
)));
32377 gcc_assert (scratch2
== NULL
32378 || (GET_CODE (scratch2
) == SCRATCH
)
32379 || (GET_MODE (scratch2
) == SImode
32380 && REG_P (scratch2
)));
32381 gcc_assert (!REG_P (out
) || !REG_P (amount
)
32382 || !HARD_REGISTER_P (out
)
32383 || (REGNO (out
) != REGNO (amount
)
32384 && REGNO (out
) + 1 != REGNO (amount
)));
32386 /* Macros to make following code more readable. */
32387 #define SUB_32(DEST,SRC) \
32388 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32389 #define RSB_32(DEST,SRC) \
32390 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32391 #define SUB_S_32(DEST,SRC) \
32392 gen_addsi3_compare0 ((DEST), (SRC), \
32394 #define SET(DEST,SRC) \
32395 gen_rtx_SET ((DEST), (SRC))
32396 #define SHIFT(CODE,SRC,AMOUNT) \
32397 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32398 #define LSHIFT(CODE,SRC,AMOUNT) \
32399 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32400 SImode, (SRC), (AMOUNT))
32401 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32402 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32403 SImode, (SRC), (AMOUNT))
32405 gen_rtx_IOR (SImode, (A), (B))
32406 #define BRANCH(COND,LABEL) \
32407 gen_arm_cond_branch ((LABEL), \
32408 gen_rtx_ ## COND (CCmode, cc_reg, \
32412 /* Shifts by register and shifts by constant are handled separately. */
32413 if (CONST_INT_P (amount
))
32415 /* We have a shift-by-constant. */
32417 /* First, handle out-of-range shift amounts.
32418 In both cases we try to match the result an ARM instruction in a
32419 shift-by-register would give. This helps reduce execution
32420 differences between optimization levels, but it won't stop other
32421 parts of the compiler doing different things. This is "undefined
32422 behavior, in any case. */
32423 if (INTVAL (amount
) <= 0)
32424 emit_insn (gen_movdi (out
, in
));
32425 else if (INTVAL (amount
) >= 64)
32427 if (code
== ASHIFTRT
)
32429 rtx const31_rtx
= GEN_INT (31);
32430 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
32431 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
32434 emit_insn (gen_movdi (out
, const0_rtx
));
32437 /* Now handle valid shifts. */
32438 else if (INTVAL (amount
) < 32)
32440 /* Shifts by a constant less than 32. */
32441 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
32443 /* Clearing the out register in DImode first avoids lots
32444 of spilling and results in less stack usage.
32445 Later this redundant insn is completely removed.
32446 Do that only if "in" and "out" are different registers. */
32447 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
32448 emit_insn (SET (out
, const0_rtx
));
32449 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
32450 emit_insn (SET (out_down
,
32451 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
32453 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
32457 /* Shifts by a constant greater than 31. */
32458 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
32460 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
32461 emit_insn (SET (out
, const0_rtx
));
32462 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
32463 if (code
== ASHIFTRT
)
32464 emit_insn (gen_ashrsi3 (out_up
, in_up
,
32467 emit_insn (SET (out_up
, const0_rtx
));
32472 /* We have a shift-by-register. */
32473 rtx cc_reg
= gen_rtx_REG (CC_NZmode
, CC_REGNUM
);
32475 /* This alternative requires the scratch registers. */
32476 gcc_assert (scratch1
&& REG_P (scratch1
));
32477 gcc_assert (scratch2
&& REG_P (scratch2
));
32479 /* We will need the values "amount-32" and "32-amount" later.
32480 Swapping them around now allows the later code to be more general. */
32484 emit_insn (SUB_32 (scratch1
, amount
));
32485 emit_insn (RSB_32 (scratch2
, amount
));
32488 emit_insn (RSB_32 (scratch1
, amount
));
32489 /* Also set CC = amount > 32. */
32490 emit_insn (SUB_S_32 (scratch2
, amount
));
32493 emit_insn (RSB_32 (scratch1
, amount
));
32494 emit_insn (SUB_32 (scratch2
, amount
));
32497 gcc_unreachable ();
32500 /* Emit code like this:
32503 out_down = in_down << amount;
32504 out_down = (in_up << (amount - 32)) | out_down;
32505 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32506 out_up = in_up << amount;
32509 out_down = in_down >> amount;
32510 out_down = (in_up << (32 - amount)) | out_down;
32512 out_down = ((signed)in_up >> (amount - 32)) | out_down;
32513 out_up = in_up << amount;
32516 out_down = in_down >> amount;
32517 out_down = (in_up << (32 - amount)) | out_down;
32519 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32520 out_up = in_up << amount;
32522 The ARM and Thumb2 variants are the same but implemented slightly
32523 differently. If this were only called during expand we could just
32524 use the Thumb2 case and let combine do the right thing, but this
32525 can also be called from post-reload splitters. */
32527 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
32529 if (!TARGET_THUMB2
)
32531 /* Emit code for ARM mode. */
32532 emit_insn (SET (out_down
,
32533 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
32534 if (code
== ASHIFTRT
)
32536 rtx_code_label
*done_label
= gen_label_rtx ();
32537 emit_jump_insn (BRANCH (LT
, done_label
));
32538 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
32540 emit_label (done_label
);
32543 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
32548 /* Emit code for Thumb2 mode.
32549 Thumb2 can't do shift and or in one insn. */
32550 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
32551 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
32553 if (code
== ASHIFTRT
)
32555 rtx_code_label
*done_label
= gen_label_rtx ();
32556 emit_jump_insn (BRANCH (LT
, done_label
));
32557 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
32558 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
32559 emit_label (done_label
);
32563 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
32564 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
32568 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
32582 /* Returns true if the pattern is a valid symbolic address, which is either a
32583 symbol_ref or (symbol_ref + addend).
32585 According to the ARM ELF ABI, the initial addend of REL-type relocations
32586 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32587 literal field of the instruction as a 16-bit signed value in the range
32588 -32768 <= A < 32768.
32590 In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32591 unsigned range of 0 <= A < 256 as described in the AAELF32
32592 relocation handling documentation: REL-type relocations are encoded
32593 as unsigned in this case. */
32596 arm_valid_symbolic_address_p (rtx addr
)
32598 rtx xop0
, xop1
= NULL_RTX
;
32601 if (target_word_relocations
)
32604 if (SYMBOL_REF_P (tmp
) || LABEL_REF_P (tmp
))
32607 /* (const (plus: symbol_ref const_int)) */
32608 if (GET_CODE (addr
) == CONST
)
32609 tmp
= XEXP (addr
, 0);
32611 if (GET_CODE (tmp
) == PLUS
)
32613 xop0
= XEXP (tmp
, 0);
32614 xop1
= XEXP (tmp
, 1);
32616 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
32618 if (TARGET_THUMB1
&& !TARGET_HAVE_MOVT
)
32619 return IN_RANGE (INTVAL (xop1
), 0, 0xff);
32621 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
32628 /* Returns true if a valid comparison operation and makes
32629 the operands in a form that is valid. */
32631 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
32633 enum rtx_code code
= GET_CODE (*comparison
);
32635 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
32636 ? GET_MODE (*op2
) : GET_MODE (*op1
);
32638 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
32640 if (code
== UNEQ
|| code
== LTGT
)
32643 code_int
= (int)code
;
32644 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
32645 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
32650 if (!arm_add_operand (*op1
, mode
))
32651 *op1
= force_reg (mode
, *op1
);
32652 if (!arm_add_operand (*op2
, mode
))
32653 *op2
= force_reg (mode
, *op2
);
32657 /* gen_compare_reg() will sort out any invalid operands. */
32661 if (!TARGET_VFP_FP16INST
)
32663 /* FP16 comparisons are done in SF mode. */
32665 *op1
= convert_to_mode (mode
, *op1
, 1);
32666 *op2
= convert_to_mode (mode
, *op2
, 1);
32667 /* Fall through. */
32670 if (!vfp_compare_operand (*op1
, mode
))
32671 *op1
= force_reg (mode
, *op1
);
32672 if (!vfp_compare_operand (*op2
, mode
))
32673 *op2
= force_reg (mode
, *op2
);
32683 /* Maximum number of instructions to set block of memory. */
32685 arm_block_set_max_insns (void)
32687 if (optimize_function_for_size_p (cfun
))
32690 return current_tune
->max_insns_inline_memset
;
32693 /* Return TRUE if it's profitable to set block of memory for
32694 non-vectorized case. VAL is the value to set the memory
32695 with. LENGTH is the number of bytes to set. ALIGN is the
32696 alignment of the destination memory in bytes. UNALIGNED_P
32697 is TRUE if we can only set the memory with instructions
32698 meeting alignment requirements. USE_STRD_P is TRUE if we
32699 can use strd to set the memory. */
32701 arm_block_set_non_vect_profit_p (rtx val
,
32702 unsigned HOST_WIDE_INT length
,
32703 unsigned HOST_WIDE_INT align
,
32704 bool unaligned_p
, bool use_strd_p
)
32707 /* For leftovers in bytes of 0-7, we can set the memory block using
32708 strb/strh/str with minimum instruction number. */
32709 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32713 num
= arm_const_inline_cost (SET
, val
);
32714 num
+= length
/ align
+ length
% align
;
32716 else if (use_strd_p
)
32718 num
= arm_const_double_inline_cost (val
);
32719 num
+= (length
>> 3) + leftover
[length
& 7];
32723 num
= arm_const_inline_cost (SET
, val
);
32724 num
+= (length
>> 2) + leftover
[length
& 3];
32727 /* We may be able to combine last pair STRH/STRB into a single STR
32728 by shifting one byte back. */
32729 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
32732 return (num
<= arm_block_set_max_insns ());
32735 /* Return TRUE if it's profitable to set block of memory for
32736 vectorized case. LENGTH is the number of bytes to set.
32737 ALIGN is the alignment of destination memory in bytes.
32738 MODE is the vector mode used to set the memory. */
32740 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
32741 unsigned HOST_WIDE_INT align
,
32745 bool unaligned_p
= ((align
& 3) != 0);
32746 unsigned int nelt
= GET_MODE_NUNITS (mode
);
32748 /* Instruction loading constant value. */
32750 /* Instructions storing the memory. */
32751 num
+= (length
+ nelt
- 1) / nelt
;
32752 /* Instructions adjusting the address expression. Only need to
32753 adjust address expression if it's 4 bytes aligned and bytes
32754 leftover can only be stored by mis-aligned store instruction. */
32755 if (!unaligned_p
&& (length
& 3) != 0)
32758 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
32759 if (!unaligned_p
&& mode
== V16QImode
)
32762 return (num
<= arm_block_set_max_insns ());
32765 /* Set a block of memory using vectorization instructions for the
32766 unaligned case. We fill the first LENGTH bytes of the memory
32767 area starting from DSTBASE with byte constant VALUE. ALIGN is
32768 the alignment requirement of memory. Return TRUE if succeeded. */
32770 arm_block_set_unaligned_vect (rtx dstbase
,
32771 unsigned HOST_WIDE_INT length
,
32772 unsigned HOST_WIDE_INT value
,
32773 unsigned HOST_WIDE_INT align
)
32775 unsigned int i
, nelt_v16
, nelt_v8
, nelt_mode
;
32778 rtx (*gen_func
) (rtx
, rtx
);
32780 unsigned HOST_WIDE_INT v
= value
;
32781 unsigned int offset
= 0;
32782 gcc_assert ((align
& 0x3) != 0);
32783 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
32784 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
32785 if (length
>= nelt_v16
)
32788 gen_func
= gen_movmisalignv16qi
;
32793 gen_func
= gen_movmisalignv8qi
;
32795 nelt_mode
= GET_MODE_NUNITS (mode
);
32796 gcc_assert (length
>= nelt_mode
);
32797 /* Skip if it isn't profitable. */
32798 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
32801 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32802 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32804 v
= sext_hwi (v
, BITS_PER_WORD
);
32806 reg
= gen_reg_rtx (mode
);
32807 val_vec
= gen_const_vec_duplicate (mode
, GEN_INT (v
));
32808 /* Emit instruction loading the constant value. */
32809 emit_move_insn (reg
, val_vec
);
32811 /* Handle nelt_mode bytes in a vector. */
32812 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
32814 emit_insn ((*gen_func
) (mem
, reg
));
32815 if (i
+ 2 * nelt_mode
<= length
)
32817 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
32818 offset
+= nelt_mode
;
32819 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32823 /* If there are not less than nelt_v8 bytes leftover, we must be in
32825 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
32827 /* Handle (8, 16) bytes leftover. */
32828 if (i
+ nelt_v8
< length
)
32830 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
32831 offset
+= length
- i
;
32832 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32834 /* We are shifting bytes back, set the alignment accordingly. */
32835 if ((length
& 1) != 0 && align
>= 2)
32836 set_mem_align (mem
, BITS_PER_UNIT
);
32838 emit_insn (gen_movmisalignv16qi (mem
, reg
));
32840 /* Handle (0, 8] bytes leftover. */
32841 else if (i
< length
&& i
+ nelt_v8
>= length
)
32843 if (mode
== V16QImode
)
32844 reg
= gen_lowpart (V8QImode
, reg
);
32846 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
32847 + (nelt_mode
- nelt_v8
))));
32848 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
32849 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
32851 /* We are shifting bytes back, set the alignment accordingly. */
32852 if ((length
& 1) != 0 && align
>= 2)
32853 set_mem_align (mem
, BITS_PER_UNIT
);
32855 emit_insn (gen_movmisalignv8qi (mem
, reg
));
32861 /* Set a block of memory using vectorization instructions for the
32862 aligned case. We fill the first LENGTH bytes of the memory area
32863 starting from DSTBASE with byte constant VALUE. ALIGN is the
32864 alignment requirement of memory. Return TRUE if succeeded. */
32866 arm_block_set_aligned_vect (rtx dstbase
,
32867 unsigned HOST_WIDE_INT length
,
32868 unsigned HOST_WIDE_INT value
,
32869 unsigned HOST_WIDE_INT align
)
32871 unsigned int i
, nelt_v8
, nelt_v16
, nelt_mode
;
32872 rtx dst
, addr
, mem
;
32875 unsigned int offset
= 0;
32877 gcc_assert ((align
& 0x3) == 0);
32878 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
32879 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
32880 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
32885 nelt_mode
= GET_MODE_NUNITS (mode
);
32886 gcc_assert (length
>= nelt_mode
);
32887 /* Skip if it isn't profitable. */
32888 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
32891 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32893 reg
= gen_reg_rtx (mode
);
32894 val_vec
= gen_const_vec_duplicate (mode
, gen_int_mode (value
, QImode
));
32895 /* Emit instruction loading the constant value. */
32896 emit_move_insn (reg
, val_vec
);
32899 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
32900 if (mode
== V16QImode
)
32902 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32903 emit_insn (gen_movmisalignv16qi (mem
, reg
));
32905 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
32906 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
32908 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
32909 offset
+= length
- nelt_mode
;
32910 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32911 /* We are shifting bytes back, set the alignment accordingly. */
32912 if ((length
& 0x3) == 0)
32913 set_mem_align (mem
, BITS_PER_UNIT
* 4);
32914 else if ((length
& 0x1) == 0)
32915 set_mem_align (mem
, BITS_PER_UNIT
* 2);
32917 set_mem_align (mem
, BITS_PER_UNIT
);
32919 emit_insn (gen_movmisalignv16qi (mem
, reg
));
32922 /* Fall through for bytes leftover. */
32924 nelt_mode
= GET_MODE_NUNITS (mode
);
32925 reg
= gen_lowpart (V8QImode
, reg
);
32928 /* Handle 8 bytes in a vector. */
32929 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
32931 addr
= plus_constant (Pmode
, dst
, i
);
32932 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
32933 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
32934 emit_move_insn (mem
, reg
);
32936 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
32939 /* Handle single word leftover by shifting 4 bytes back. We can
32940 use aligned access for this case. */
32941 if (i
+ UNITS_PER_WORD
== length
)
32943 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
32944 offset
+= i
- UNITS_PER_WORD
;
32945 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
32946 /* We are shifting 4 bytes back, set the alignment accordingly. */
32947 if (align
> UNITS_PER_WORD
)
32948 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
32950 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
32952 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32953 We have to use unaligned access for this case. */
32954 else if (i
< length
)
32956 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
32957 offset
+= length
- nelt_mode
;
32958 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32959 /* We are shifting bytes back, set the alignment accordingly. */
32960 if ((length
& 1) == 0)
32961 set_mem_align (mem
, BITS_PER_UNIT
* 2);
32963 set_mem_align (mem
, BITS_PER_UNIT
);
32965 emit_insn (gen_movmisalignv8qi (mem
, reg
));
32971 /* Set a block of memory using plain strh/strb instructions, only
32972 using instructions allowed by ALIGN on processor. We fill the
32973 first LENGTH bytes of the memory area starting from DSTBASE
32974 with byte constant VALUE. ALIGN is the alignment requirement
32977 arm_block_set_unaligned_non_vect (rtx dstbase
,
32978 unsigned HOST_WIDE_INT length
,
32979 unsigned HOST_WIDE_INT value
,
32980 unsigned HOST_WIDE_INT align
)
32983 rtx dst
, addr
, mem
;
32984 rtx val_exp
, val_reg
, reg
;
32986 HOST_WIDE_INT v
= value
;
32988 gcc_assert (align
== 1 || align
== 2);
32991 v
|= (value
<< BITS_PER_UNIT
);
32993 v
= sext_hwi (v
, BITS_PER_WORD
);
32994 val_exp
= GEN_INT (v
);
32995 /* Skip if it isn't profitable. */
32996 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32997 align
, true, false))
33000 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
33001 mode
= (align
== 2 ? HImode
: QImode
);
33002 val_reg
= force_reg (SImode
, val_exp
);
33003 reg
= gen_lowpart (mode
, val_reg
);
33005 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
33007 addr
= plus_constant (Pmode
, dst
, i
);
33008 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
33009 emit_move_insn (mem
, reg
);
33012 /* Handle single byte leftover. */
33013 if (i
+ 1 == length
)
33015 reg
= gen_lowpart (QImode
, val_reg
);
33016 addr
= plus_constant (Pmode
, dst
, i
);
33017 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
33018 emit_move_insn (mem
, reg
);
33022 gcc_assert (i
== length
);
33026 /* Set a block of memory using plain strd/str/strh/strb instructions,
33027 to permit unaligned copies on processors which support unaligned
33028 semantics for those instructions. We fill the first LENGTH bytes
33029 of the memory area starting from DSTBASE with byte constant VALUE.
33030 ALIGN is the alignment requirement of memory. */
33032 arm_block_set_aligned_non_vect (rtx dstbase
,
33033 unsigned HOST_WIDE_INT length
,
33034 unsigned HOST_WIDE_INT value
,
33035 unsigned HOST_WIDE_INT align
)
33038 rtx dst
, addr
, mem
;
33039 rtx val_exp
, val_reg
, reg
;
33040 unsigned HOST_WIDE_INT v
;
33043 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
33044 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
33046 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
33047 if (length
< UNITS_PER_WORD
)
33048 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
33051 v
|= (v
<< BITS_PER_WORD
);
33053 v
= sext_hwi (v
, BITS_PER_WORD
);
33055 val_exp
= GEN_INT (v
);
33056 /* Skip if it isn't profitable. */
33057 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
33058 align
, false, use_strd_p
))
33063 /* Try without strd. */
33064 v
= (v
>> BITS_PER_WORD
);
33065 v
= sext_hwi (v
, BITS_PER_WORD
);
33066 val_exp
= GEN_INT (v
);
33067 use_strd_p
= false;
33068 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
33069 align
, false, use_strd_p
))
33074 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
33075 /* Handle double words using strd if possible. */
33078 val_reg
= force_reg (DImode
, val_exp
);
33080 for (; (i
+ 8 <= length
); i
+= 8)
33082 addr
= plus_constant (Pmode
, dst
, i
);
33083 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
33084 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
33085 emit_move_insn (mem
, reg
);
33087 emit_insn (gen_unaligned_storedi (mem
, reg
));
33091 val_reg
= force_reg (SImode
, val_exp
);
33093 /* Handle words. */
33094 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
33095 for (; (i
+ 4 <= length
); i
+= 4)
33097 addr
= plus_constant (Pmode
, dst
, i
);
33098 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
33099 if ((align
& 3) == 0)
33100 emit_move_insn (mem
, reg
);
33102 emit_insn (gen_unaligned_storesi (mem
, reg
));
33105 /* Merge last pair of STRH and STRB into a STR if possible. */
33106 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
33108 addr
= plus_constant (Pmode
, dst
, i
- 1);
33109 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
33110 /* We are shifting one byte back, set the alignment accordingly. */
33111 if ((align
& 1) == 0)
33112 set_mem_align (mem
, BITS_PER_UNIT
);
33114 /* Most likely this is an unaligned access, and we can't tell at
33115 compilation time. */
33116 emit_insn (gen_unaligned_storesi (mem
, reg
));
33120 /* Handle half word leftover. */
33121 if (i
+ 2 <= length
)
33123 reg
= gen_lowpart (HImode
, val_reg
);
33124 addr
= plus_constant (Pmode
, dst
, i
);
33125 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
33126 if ((align
& 1) == 0)
33127 emit_move_insn (mem
, reg
);
33129 emit_insn (gen_unaligned_storehi (mem
, reg
));
33134 /* Handle single byte leftover. */
33135 if (i
+ 1 == length
)
33137 reg
= gen_lowpart (QImode
, val_reg
);
33138 addr
= plus_constant (Pmode
, dst
, i
);
33139 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
33140 emit_move_insn (mem
, reg
);
33146 /* Set a block of memory using vectorization instructions for both
33147 aligned and unaligned cases. We fill the first LENGTH bytes of
33148 the memory area starting from DSTBASE with byte constant VALUE.
33149 ALIGN is the alignment requirement of memory. */
33151 arm_block_set_vect (rtx dstbase
,
33152 unsigned HOST_WIDE_INT length
,
33153 unsigned HOST_WIDE_INT value
,
33154 unsigned HOST_WIDE_INT align
)
33156 /* Check whether we need to use unaligned store instruction. */
33157 if (((align
& 3) != 0 || (length
& 3) != 0)
33158 /* Check whether unaligned store instruction is available. */
33159 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
33162 if ((align
& 3) == 0)
33163 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
33165 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
33168 /* Expand string store operation. Firstly we try to do that by using
33169 vectorization instructions, then try with ARM unaligned access and
33170 double-word store if profitable. OPERANDS[0] is the destination,
33171 OPERANDS[1] is the number of bytes, operands[2] is the value to
33172 initialize the memory, OPERANDS[3] is the known alignment of the
33175 arm_gen_setmem (rtx
*operands
)
33177 rtx dstbase
= operands
[0];
33178 unsigned HOST_WIDE_INT length
;
33179 unsigned HOST_WIDE_INT value
;
33180 unsigned HOST_WIDE_INT align
;
33182 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
33185 length
= UINTVAL (operands
[1]);
33189 value
= (UINTVAL (operands
[2]) & 0xFF);
33190 align
= UINTVAL (operands
[3]);
33191 if (TARGET_NEON
&& length
>= 8
33192 && current_tune
->string_ops_prefer_neon
33193 && arm_block_set_vect (dstbase
, length
, value
, align
))
33196 if (!unaligned_access
&& (align
& 3) != 0)
33197 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
33199 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
33204 arm_macro_fusion_p (void)
33206 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
33209 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
33210 for MOVW / MOVT macro fusion. */
33213 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
33215 /* We are trying to fuse
33216 movw imm / movt imm
33217 instructions as a group that gets scheduled together. */
33219 rtx set_dest
= SET_DEST (curr_set
);
33221 if (GET_MODE (set_dest
) != SImode
)
33224 /* We are trying to match:
33225 prev (movw) == (set (reg r0) (const_int imm16))
33226 curr (movt) == (set (zero_extract (reg r0)
33229 (const_int imm16_1))
33231 prev (movw) == (set (reg r1)
33232 (high (symbol_ref ("SYM"))))
33233 curr (movt) == (set (reg r0)
33235 (symbol_ref ("SYM")))) */
33237 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
33239 if (CONST_INT_P (SET_SRC (curr_set
))
33240 && CONST_INT_P (SET_SRC (prev_set
))
33241 && REG_P (XEXP (set_dest
, 0))
33242 && REG_P (SET_DEST (prev_set
))
33243 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
33247 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
33248 && REG_P (SET_DEST (curr_set
))
33249 && REG_P (SET_DEST (prev_set
))
33250 && GET_CODE (SET_SRC (prev_set
)) == HIGH
33251 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
33258 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
33260 rtx prev_set
= single_set (prev
);
33261 rtx curr_set
= single_set (curr
);
33267 if (any_condjump_p (curr
))
33270 if (!arm_macro_fusion_p ())
33273 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
33274 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
33280 /* Return true iff the instruction fusion described by OP is enabled. */
33282 arm_fusion_enabled_p (tune_params::fuse_ops op
)
33284 return current_tune
->fusible_ops
& op
;
33287 /* Return TRUE if return address signing mechanism is enabled. */
33289 arm_current_function_pac_enabled_p (void)
33291 return (aarch_ra_sign_scope
== AARCH_FUNCTION_ALL
33292 || (aarch_ra_sign_scope
== AARCH_FUNCTION_NON_LEAF
33293 && !crtl
->is_leaf
));
33296 /* Raise an error if the current target arch is not bti compatible. */
33297 void aarch_bti_arch_check (void)
33299 if (!arm_arch8m_main
)
33300 error ("This architecture does not support branch protection instructions");
33303 /* Return TRUE if Branch Target Identification Mechanism is enabled. */
33305 aarch_bti_enabled (void)
33307 return aarch_enable_bti
!= 0;
33310 /* Check if INSN is a BTI J insn. */
33312 aarch_bti_j_insn_p (rtx_insn
*insn
)
33314 if (!insn
|| !INSN_P (insn
))
33317 rtx pat
= PATTERN (insn
);
33318 return GET_CODE (pat
) == UNSPEC_VOLATILE
&& XINT (pat
, 1) == VUNSPEC_BTI_NOP
;
33321 /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction. */
33323 aarch_pac_insn_p (rtx x
)
33325 if (!x
|| !INSN_P (x
))
33328 rtx pat
= PATTERN (x
);
33330 if (GET_CODE (pat
) == SET
)
33332 rtx tmp
= XEXP (pat
, 1);
33334 && ((GET_CODE (tmp
) == UNSPEC
33335 && XINT (tmp
, 1) == UNSPEC_PAC_NOP
)
33336 || (GET_CODE (tmp
) == UNSPEC_VOLATILE
33337 && XINT (tmp
, 1) == VUNSPEC_PACBTI_NOP
)))
33344 /* Target specific mapping for aarch_gen_bti_c and aarch_gen_bti_j.
33345 For Arm, both of these map to a simple BTI instruction. */
33348 aarch_gen_bti_c (void)
33350 return gen_bti_nop ();
33354 aarch_gen_bti_j (void)
33356 return gen_bti_nop ();
33359 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
33360 scheduled for speculative execution. Reject the long-running division
33361 and square-root instructions. */
33364 arm_sched_can_speculate_insn (rtx_insn
*insn
)
33366 switch (get_attr_type (insn
))
33374 case TYPE_NEON_FP_SQRT_S
:
33375 case TYPE_NEON_FP_SQRT_D
:
33376 case TYPE_NEON_FP_SQRT_S_Q
:
33377 case TYPE_NEON_FP_SQRT_D_Q
:
33378 case TYPE_NEON_FP_DIV_S
:
33379 case TYPE_NEON_FP_DIV_D
:
33380 case TYPE_NEON_FP_DIV_S_Q
:
33381 case TYPE_NEON_FP_DIV_D_Q
:
33388 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
33390 static unsigned HOST_WIDE_INT
33391 arm_asan_shadow_offset (void)
33393 return HOST_WIDE_INT_1U
<< 29;
33397 /* This is a temporary fix for PR60655. Ideally we need
33398 to handle most of these cases in the generic part but
33399 currently we reject minus (..) (sym_ref). We try to
33400 ameliorate the case with minus (sym_ref1) (sym_ref2)
33401 where they are in the same section. */
33404 arm_const_not_ok_for_debug_p (rtx p
)
33406 tree decl_op0
= NULL
;
33407 tree decl_op1
= NULL
;
33409 if (GET_CODE (p
) == UNSPEC
)
33411 if (GET_CODE (p
) == MINUS
)
33413 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
33415 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
33417 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
33418 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
33420 if ((VAR_P (decl_op1
)
33421 || TREE_CODE (decl_op1
) == CONST_DECL
)
33422 && (VAR_P (decl_op0
)
33423 || TREE_CODE (decl_op0
) == CONST_DECL
))
33424 return (get_variable_section (decl_op1
, false)
33425 != get_variable_section (decl_op0
, false));
33427 if (TREE_CODE (decl_op1
) == LABEL_DECL
33428 && TREE_CODE (decl_op0
) == LABEL_DECL
)
33429 return (DECL_CONTEXT (decl_op1
)
33430 != DECL_CONTEXT (decl_op0
));
33440 /* return TRUE if x is a reference to a value in a constant pool */
33442 arm_is_constant_pool_ref (rtx x
)
33445 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
33446 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
33449 /* Remember the last target of arm_set_current_function. */
33450 static GTY(()) tree arm_previous_fndecl
;
33452 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
33455 save_restore_target_globals (tree new_tree
)
33457 /* If we have a previous state, use it. */
33458 if (TREE_TARGET_GLOBALS (new_tree
))
33459 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
33460 else if (new_tree
== target_option_default_node
)
33461 restore_target_globals (&default_target_globals
);
33464 /* Call target_reinit and save the state for TARGET_GLOBALS. */
33465 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
33468 arm_option_params_internal ();
33471 /* Invalidate arm_previous_fndecl. */
33474 arm_reset_previous_fndecl (void)
33476 arm_previous_fndecl
= NULL_TREE
;
33479 /* Establish appropriate back-end context for processing the function
33480 FNDECL. The argument might be NULL to indicate processing at top
33481 level, outside of any function scope. */
33484 arm_set_current_function (tree fndecl
)
33486 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
33489 tree old_tree
= (arm_previous_fndecl
33490 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
33493 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
33495 /* If current function has no attributes but previous one did,
33496 use the default node. */
33497 if (! new_tree
&& old_tree
)
33498 new_tree
= target_option_default_node
;
33500 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
33501 the default have been handled by save_restore_target_globals from
33502 arm_pragma_target_parse. */
33503 if (old_tree
== new_tree
)
33506 arm_previous_fndecl
= fndecl
;
33508 /* First set the target options. */
33509 cl_target_option_restore (&global_options
, &global_options_set
,
33510 TREE_TARGET_OPTION (new_tree
));
33512 save_restore_target_globals (new_tree
);
33514 arm_override_options_after_change_1 (&global_options
, &global_options_set
);
33517 /* Implement TARGET_OPTION_PRINT. */
33520 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
33522 int flags
= ptr
->x_target_flags
;
33523 const char *fpu_name
;
33525 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
33526 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
33528 fprintf (file
, "%*sselected isa %s\n", indent
, "",
33529 TARGET_THUMB2_P (flags
) ? "thumb2" :
33530 TARGET_THUMB_P (flags
) ? "thumb1" :
33533 if (ptr
->x_arm_arch_string
)
33534 fprintf (file
, "%*sselected architecture %s\n", indent
, "",
33535 ptr
->x_arm_arch_string
);
33537 if (ptr
->x_arm_cpu_string
)
33538 fprintf (file
, "%*sselected CPU %s\n", indent
, "",
33539 ptr
->x_arm_cpu_string
);
33541 if (ptr
->x_arm_tune_string
)
33542 fprintf (file
, "%*sselected tune %s\n", indent
, "",
33543 ptr
->x_arm_tune_string
);
33545 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
33548 /* Hook to determine if one function can safely inline another. */
33551 arm_can_inline_p (tree caller
, tree callee
)
33553 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
33554 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
33555 bool can_inline
= true;
33557 struct cl_target_option
*caller_opts
33558 = TREE_TARGET_OPTION (caller_tree
? caller_tree
33559 : target_option_default_node
);
33561 struct cl_target_option
*callee_opts
33562 = TREE_TARGET_OPTION (callee_tree
? callee_tree
33563 : target_option_default_node
);
33565 if (callee_opts
== caller_opts
)
33568 /* Callee's ISA features should be a subset of the caller's. */
33569 struct arm_build_target caller_target
;
33570 struct arm_build_target callee_target
;
33571 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
33572 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
33574 arm_configure_build_target (&caller_target
, caller_opts
, false);
33575 arm_configure_build_target (&callee_target
, callee_opts
, false);
33576 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
33577 can_inline
= false;
33579 sbitmap_free (caller_target
.isa
);
33580 sbitmap_free (callee_target
.isa
);
33582 /* OK to inline between different modes.
33583 Function with mode specific instructions, e.g using asm,
33584 must be explicitly protected with noinline. */
33588 /* Hook to fix function's alignment affected by target attribute. */
33591 arm_relayout_function (tree fndecl
)
33593 if (DECL_USER_ALIGN (fndecl
))
33596 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
33599 callee_tree
= target_option_default_node
;
33601 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
33604 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
33607 /* Inner function to process the attribute((target(...))), take an argument and
33608 set the current options from the argument. If we have a list, recursively
33609 go over the list. */
33612 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
33614 if (TREE_CODE (args
) == TREE_LIST
)
33618 for (; args
; args
= TREE_CHAIN (args
))
33619 if (TREE_VALUE (args
)
33620 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
33625 else if (TREE_CODE (args
) != STRING_CST
)
33627 error ("attribute %<target%> argument not a string");
33631 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
33634 while ((q
= strtok (argstr
, ",")) != NULL
)
33637 if (!strcmp (q
, "thumb"))
33639 opts
->x_target_flags
|= MASK_THUMB
;
33640 if (TARGET_FDPIC
&& !arm_arch_thumb2
)
33641 sorry ("FDPIC mode is not supported in Thumb-1 mode");
33644 else if (!strcmp (q
, "arm"))
33645 opts
->x_target_flags
&= ~MASK_THUMB
;
33647 else if (!strcmp (q
, "general-regs-only"))
33648 opts
->x_target_flags
|= MASK_GENERAL_REGS_ONLY
;
33650 else if (startswith (q
, "fpu="))
33653 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+ 4,
33654 &fpu_index
, CL_TARGET
))
33656 error ("invalid fpu for target attribute or pragma %qs", q
);
33659 if (fpu_index
== TARGET_FPU_auto
)
33661 /* This doesn't really make sense until we support
33662 general dynamic selection of the architecture and all
33664 sorry ("auto fpu selection not currently permitted here");
33667 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
33669 else if (startswith (q
, "arch="))
33671 char *arch
= q
+ 5;
33672 const arch_option
*arm_selected_arch
33673 = arm_parse_arch_option_name (all_architectures
, "arch", arch
);
33675 if (!arm_selected_arch
)
33677 error ("invalid architecture for target attribute or pragma %qs",
33682 opts
->x_arm_arch_string
= xstrndup (arch
, strlen (arch
));
33684 else if (q
[0] == '+')
33686 opts
->x_arm_arch_string
33687 = xasprintf ("%s%s", opts
->x_arm_arch_string
, q
);
33691 error ("unknown target attribute or pragma %qs", q
);
33699 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
33702 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
33703 struct gcc_options
*opts_set
)
33705 struct cl_target_option cl_opts
;
33707 if (!arm_valid_target_attribute_rec (args
, opts
))
33710 cl_target_option_save (&cl_opts
, opts
, opts_set
);
33711 arm_configure_build_target (&arm_active_target
, &cl_opts
, false);
33712 arm_option_check_internal (opts
);
33713 /* Do any overrides, such as global options arch=xxx.
33714 We do this since arm_active_target was overridden. */
33715 arm_option_reconfigure_globals ();
33716 arm_options_perform_arch_sanity_checks ();
33717 arm_option_override_internal (opts
, opts_set
);
33719 return build_target_option_node (opts
, opts_set
);
33723 add_attribute (const char * mode
, tree
*attributes
)
33725 size_t len
= strlen (mode
);
33726 tree value
= build_string (len
, mode
);
33728 TREE_TYPE (value
) = build_array_type (char_type_node
,
33729 build_index_type (size_int (len
)));
33731 *attributes
= tree_cons (get_identifier ("target"),
33732 build_tree_list (NULL_TREE
, value
),
33736 /* For testing. Insert thumb or arm modes alternatively on functions. */
33739 arm_insert_attributes (tree fndecl
, tree
* attributes
)
33743 if (! TARGET_FLIP_THUMB
)
33746 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
33747 || fndecl_built_in_p (fndecl
) || DECL_ARTIFICIAL (fndecl
))
33750 /* Nested definitions must inherit mode. */
33751 if (current_function_decl
)
33753 mode
= TARGET_THUMB
? "thumb" : "arm";
33754 add_attribute (mode
, attributes
);
33758 /* If there is already a setting don't change it. */
33759 if (lookup_attribute ("target", *attributes
) != NULL
)
33762 mode
= thumb_flipper
? "thumb" : "arm";
33763 add_attribute (mode
, attributes
);
33765 thumb_flipper
= !thumb_flipper
;
33768 /* Hook to validate attribute((target("string"))). */
33771 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
33772 tree args
, int ARG_UNUSED (flags
))
33775 struct gcc_options func_options
, func_options_set
;
33776 tree cur_tree
, new_optimize
;
33777 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
33779 /* Get the optimization options of the current function. */
33780 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
33782 /* If the function changed the optimization levels as well as setting target
33783 options, start with the optimizations specified. */
33784 if (!func_optimize
)
33785 func_optimize
= optimization_default_node
;
33787 /* Init func_options. */
33788 memset (&func_options
, 0, sizeof (func_options
));
33789 init_options_struct (&func_options
, NULL
);
33790 lang_hooks
.init_options_struct (&func_options
);
33791 memset (&func_options_set
, 0, sizeof (func_options_set
));
33793 /* Initialize func_options to the defaults. */
33794 cl_optimization_restore (&func_options
, &func_options_set
,
33795 TREE_OPTIMIZATION (func_optimize
));
33797 cl_target_option_restore (&func_options
, &func_options_set
,
33798 TREE_TARGET_OPTION (target_option_default_node
));
33800 /* Set func_options flags with new target mode. */
33801 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
33802 &func_options_set
);
33804 if (cur_tree
== NULL_TREE
)
33807 new_optimize
= build_optimization_node (&func_options
, &func_options_set
);
33809 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
33811 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
33816 /* Match an ISA feature bitmap to a named FPU. We always use the
33817 first entry that exactly matches the feature set, so that we
33818 effectively canonicalize the FPU name for the assembler. */
33820 arm_identify_fpu_from_isa (sbitmap isa
)
33822 auto_sbitmap
fpubits (isa_num_bits
);
33823 auto_sbitmap
cand_fpubits (isa_num_bits
);
33825 bitmap_and (fpubits
, isa
, isa_all_fpubits_internal
);
33827 /* If there are no ISA feature bits relating to the FPU, we must be
33828 doing soft-float. */
33829 if (bitmap_empty_p (fpubits
))
33832 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
33834 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
33835 if (bitmap_equal_p (fpubits
, cand_fpubits
))
33836 return all_fpus
[i
].name
;
33838 /* We must find an entry, or things have gone wrong. */
33839 gcc_unreachable ();
33842 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
33843 by the function fndecl. */
33845 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
33847 tree target_parts
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
33849 struct cl_target_option
*targ_options
;
33851 targ_options
= TREE_TARGET_OPTION (target_parts
);
33853 targ_options
= TREE_TARGET_OPTION (target_option_current_node
);
33854 gcc_assert (targ_options
);
33856 arm_print_asm_arch_directives (stream
, targ_options
);
33858 fprintf (stream
, "\t.syntax unified\n");
33862 if (is_called_in_ARM_mode (decl
)
33863 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
33864 && cfun
->is_thunk
))
33865 fprintf (stream
, "\t.code 32\n");
33866 else if (TARGET_THUMB1
)
33867 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
33869 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
33872 fprintf (stream
, "\t.arm\n");
33874 if (TARGET_POKE_FUNCTION_NAME
)
33875 arm_poke_function_name (stream
, (const char *) name
);
33878 /* If MEM is in the form of [base+offset], extract the two parts
33879 of address and set to BASE and OFFSET, otherwise return false
33880 after clearing BASE and OFFSET. */
33883 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
33887 gcc_assert (MEM_P (mem
));
33889 addr
= XEXP (mem
, 0);
33891 /* Strip off const from addresses like (const (addr)). */
33892 if (GET_CODE (addr
) == CONST
)
33893 addr
= XEXP (addr
, 0);
33898 *offset
= const0_rtx
;
33902 if (GET_CODE (addr
) == PLUS
33903 && GET_CODE (XEXP (addr
, 0)) == REG
33904 && CONST_INT_P (XEXP (addr
, 1)))
33906 *base
= XEXP (addr
, 0);
33907 *offset
= XEXP (addr
, 1);
33912 *offset
= NULL_RTX
;
33917 /* If INSN is a load or store of address in the form of [base+offset],
33918 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
33919 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
33920 otherwise return FALSE. */
33923 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
33927 gcc_assert (INSN_P (insn
));
33928 x
= PATTERN (insn
);
33929 if (GET_CODE (x
) != SET
)
33933 dest
= SET_DEST (x
);
33934 if (REG_P (src
) && MEM_P (dest
))
33937 extract_base_offset_in_addr (dest
, base
, offset
);
33939 else if (MEM_P (src
) && REG_P (dest
))
33942 extract_base_offset_in_addr (src
, base
, offset
);
33947 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
33950 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33952 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33953 and PRI are only calculated for these instructions. For other instruction,
33954 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
33955 instruction fusion can be supported by returning different priorities.
33957 It's important that irrelevant instructions get the largest FUSION_PRI. */
33960 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
33961 int *fusion_pri
, int *pri
)
33967 gcc_assert (INSN_P (insn
));
33970 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
33977 /* Load goes first. */
33979 *fusion_pri
= tmp
- 1;
33981 *fusion_pri
= tmp
- 2;
33985 /* INSN with smaller base register goes first. */
33986 tmp
-= ((REGNO (base
) & 0xff) << 20);
33988 /* INSN with smaller offset goes first. */
33989 off_val
= (int)(INTVAL (offset
));
33991 tmp
-= (off_val
& 0xfffff);
33993 tmp
+= ((- off_val
) & 0xfffff);
34000 /* Construct and return a PARALLEL RTX vector with elements numbering the
34001 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
34002 the vector - from the perspective of the architecture. This does not
34003 line up with GCC's perspective on lane numbers, so we end up with
34004 different masks depending on our target endian-ness. The diagram
34005 below may help. We must draw the distinction when building masks
34006 which select one half of the vector. An instruction selecting
34007 architectural low-lanes for a big-endian target, must be described using
34008 a mask selecting GCC high-lanes.
34010 Big-Endian Little-Endian
34012 GCC 0 1 2 3 3 2 1 0
34013 | x | x | x | x | | x | x | x | x |
34014 Architecture 3 2 1 0 3 2 1 0
34016 Low Mask: { 2, 3 } { 0, 1 }
34017 High Mask: { 0, 1 } { 2, 3 }
34021 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
34023 int nunits
= GET_MODE_NUNITS (mode
);
34024 rtvec v
= rtvec_alloc (nunits
/ 2);
34025 int high_base
= nunits
/ 2;
34031 if (BYTES_BIG_ENDIAN
)
34032 base
= high
? low_base
: high_base
;
34034 base
= high
? high_base
: low_base
;
34036 for (i
= 0; i
< nunits
/ 2; i
++)
34037 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
34039 t1
= gen_rtx_PARALLEL (mode
, v
);
34043 /* Check OP for validity as a PARALLEL RTX vector with elements
34044 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
34045 from the perspective of the architecture. See the diagram above
34046 arm_simd_vect_par_cnst_half_p for more details. */
34049 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
34052 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
34053 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
34054 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
34057 if (!VECTOR_MODE_P (mode
))
34060 if (count_op
!= count_ideal
)
34063 for (i
= 0; i
< count_ideal
; i
++)
34065 rtx elt_op
= XVECEXP (op
, 0, i
);
34066 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
34068 if (!CONST_INT_P (elt_op
)
34069 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
34075 /* Can output mi_thunk for all cases except for non-zero vcall_offset
34078 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
34081 /* For now, we punt and not handle this for TARGET_THUMB1. */
34082 if (vcall_offset
&& TARGET_THUMB1
)
34085 /* Otherwise ok. */
34089 /* Generate RTL for a conditional branch with rtx comparison CODE in
34090 mode CC_MODE. The destination of the unlikely conditional branch
34094 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
34098 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
34099 gen_rtx_REG (cc_mode
, CC_REGNUM
),
34102 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
34103 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
34105 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
34108 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
34110 For pure-code sections there is no letter code for this attribute, so
34111 output all the section flags numerically when this is needed. */
34114 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
34117 if (flags
& SECTION_ARM_PURECODE
)
34121 if (!(flags
& SECTION_DEBUG
))
34123 if (flags
& SECTION_EXCLUDE
)
34124 *num
|= 0x80000000;
34125 if (flags
& SECTION_WRITE
)
34127 if (flags
& SECTION_CODE
)
34129 if (flags
& SECTION_MERGE
)
34131 if (flags
& SECTION_STRINGS
)
34133 if (flags
& SECTION_TLS
)
34135 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
34144 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
34146 If pure-code is passed as an option, make sure all functions are in
34147 sections that have the SHF_ARM_PURECODE attribute. */
34150 arm_function_section (tree decl
, enum node_frequency freq
,
34151 bool startup
, bool exit
)
34153 const char * section_name
;
34156 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
34157 return default_function_section (decl
, freq
, startup
, exit
);
34159 if (!target_pure_code
)
34160 return default_function_section (decl
, freq
, startup
, exit
);
34163 section_name
= DECL_SECTION_NAME (decl
);
34165 /* If a function is not in a named section then it falls under the 'default'
34166 text section, also known as '.text'. We can preserve previous behavior as
34167 the default text section already has the SHF_ARM_PURECODE section
34171 section
*default_sec
= default_function_section (decl
, freq
, startup
,
34174 /* If default_sec is not null, then it must be a special section like for
34175 example .text.startup. We set the pure-code attribute and return the
34176 same section to preserve existing behavior. */
34178 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
34179 return default_sec
;
34182 /* Otherwise look whether a section has already been created with
34184 sec
= get_named_section (decl
, section_name
, 0);
34186 /* If that is not the case passing NULL as the section's name to
34187 'get_named_section' will create a section with the declaration's
34189 sec
= get_named_section (decl
, NULL
, 0);
34191 /* Set the SHF_ARM_PURECODE attribute. */
34192 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
34197 /* Implements the TARGET_SECTION_FLAGS hook.
34199 If DECL is a function declaration and pure-code is passed as an option
34200 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
34201 section's name and RELOC indicates whether the declarations initializer may
34202 contain runtime relocations. */
34204 static unsigned int
34205 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
34207 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
34209 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
34210 flags
|= SECTION_ARM_PURECODE
;
34215 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
34218 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
34220 rtx
*quot_p
, rtx
*rem_p
)
34222 if (mode
== SImode
)
34223 gcc_assert (!TARGET_IDIV
);
34225 scalar_int_mode libval_mode
34226 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode
));
34228 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
34229 libval_mode
, op0
, mode
, op1
, mode
);
34231 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
34232 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
34233 GET_MODE_SIZE (mode
));
34235 gcc_assert (quotient
);
34236 gcc_assert (remainder
);
34238 *quot_p
= quotient
;
34239 *rem_p
= remainder
;
34242 /* This function checks for the availability of the coprocessor builtin passed
34243 in BUILTIN for the current target. Returns true if it is available and
34244 false otherwise. If a BUILTIN is passed for which this function has not
34245 been implemented it will cause an exception. */
34248 arm_coproc_builtin_available (enum unspecv builtin
)
34250 /* None of these builtins are available in Thumb mode if the target only
34251 supports Thumb-1. */
34269 case VUNSPEC_LDC2L
:
34271 case VUNSPEC_STC2L
:
34274 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
34281 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
34283 if (arm_arch6
|| arm_arch5te
)
34286 case VUNSPEC_MCRR2
:
34287 case VUNSPEC_MRRC2
:
34292 gcc_unreachable ();
34297 /* This function returns true if OP is a valid memory operand for the ldc and
34298 stc coprocessor instructions and false otherwise. */
34301 arm_coproc_ldc_stc_legitimate_address (rtx op
)
34303 HOST_WIDE_INT range
;
34304 /* Has to be a memory operand. */
34310 /* We accept registers. */
34314 switch GET_CODE (op
)
34318 /* Or registers with an offset. */
34319 if (!REG_P (XEXP (op
, 0)))
34324 /* The offset must be an immediate though. */
34325 if (!CONST_INT_P (op
))
34328 range
= INTVAL (op
);
34330 /* Within the range of [-1020,1020]. */
34331 if (!IN_RANGE (range
, -1020, 1020))
34334 /* And a multiple of 4. */
34335 return (range
% 4) == 0;
34341 return REG_P (XEXP (op
, 0));
34343 gcc_unreachable ();
34348 /* Return the diagnostic message string if conversion from FROMTYPE to
34349 TOTYPE is not allowed, NULL otherwise. */
34351 static const char *
34352 arm_invalid_conversion (const_tree fromtype
, const_tree totype
)
34354 if (element_mode (fromtype
) != element_mode (totype
))
34356 /* Do no allow conversions to/from BFmode scalar types. */
34357 if (TYPE_MODE (fromtype
) == BFmode
)
34358 return N_("invalid conversion from type %<bfloat16_t%>");
34359 if (TYPE_MODE (totype
) == BFmode
)
34360 return N_("invalid conversion to type %<bfloat16_t%>");
34363 /* Conversion allowed. */
34367 /* Return the diagnostic message string if the unary operation OP is
34368 not permitted on TYPE, NULL otherwise. */
34370 static const char *
34371 arm_invalid_unary_op (int op
, const_tree type
)
34373 /* Reject all single-operand operations on BFmode except for &. */
34374 if (element_mode (type
) == BFmode
&& op
!= ADDR_EXPR
)
34375 return N_("operation not permitted on type %<bfloat16_t%>");
34377 /* Operation allowed. */
34381 /* Return the diagnostic message string if the binary operation OP is
34382 not permitted on TYPE1 and TYPE2, NULL otherwise. */
34384 static const char *
34385 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
,
34388 /* Reject all 2-operand operations on BFmode. */
34389 if (element_mode (type1
) == BFmode
34390 || element_mode (type2
) == BFmode
)
34391 return N_("operation not permitted on type %<bfloat16_t%>");
34393 /* Operation allowed. */
34397 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34399 In VFPv1, VFP registers could only be accessed in the mode they were
34400 set, so subregs would be invalid there. However, we don't support
34401 VFPv1 at the moment, and the restriction was lifted in VFPv2.
34403 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34404 VFP registers in little-endian order. We can't describe that accurately to
34405 GCC, so avoid taking subregs of such values.
34407 The only exception is going from a 128-bit to a 64-bit type. In that
34408 case the data layout happens to be consistent for big-endian, so we
34409 explicitly allow that case. */
34412 arm_can_change_mode_class (machine_mode from
, machine_mode to
,
34413 reg_class_t rclass
)
34416 && !(GET_MODE_SIZE (from
) == 16 && GET_MODE_SIZE (to
) == 8)
34417 && (GET_MODE_SIZE (from
) > UNITS_PER_WORD
34418 || GET_MODE_SIZE (to
) > UNITS_PER_WORD
)
34419 && reg_classes_intersect_p (VFP_REGS
, rclass
))
34424 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
34425 strcpy from constants will be faster. */
34427 static HOST_WIDE_INT
34428 arm_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
34430 unsigned int factor
= (TARGET_THUMB
|| ! arm_tune_xscale
? 1 : 2);
34431 if (TREE_CODE (exp
) == STRING_CST
&& !optimize_size
)
34432 return MAX (align
, BITS_PER_WORD
* factor
);
34436 /* Emit a speculation barrier on target architectures that do not have
34437 DSB/ISB directly. Such systems probably don't need a barrier
34438 themselves, but if the code is ever run on a later architecture, it
34439 might become a problem. */
34441 arm_emit_speculation_barrier_function ()
34443 emit_library_call (speculation_barrier_libfunc
, LCT_NORMAL
, VOIDmode
);
34446 /* Have we recorded an explicit access to the Q bit of APSR?. */
34448 arm_q_bit_access (void)
34450 if (cfun
&& cfun
->decl
)
34451 return lookup_attribute ("acle qbit",
34452 DECL_ATTRIBUTES (cfun
->decl
));
34456 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
34458 arm_ge_bits_access (void)
34460 if (cfun
&& cfun
->decl
)
34461 return lookup_attribute ("acle gebits",
34462 DECL_ATTRIBUTES (cfun
->decl
));
34466 /* NULL if insn INSN is valid within a low-overhead loop.
34467 Otherwise return why doloop cannot be applied. */
34469 static const char *
34470 arm_invalid_within_doloop (const rtx_insn
*insn
)
34472 if (!TARGET_HAVE_LOB
)
34473 return default_invalid_within_doloop (insn
);
34476 return "Function call in the loop.";
34478 if (reg_mentioned_p (gen_rtx_REG (SImode
, LR_REGNUM
), insn
))
34479 return "LR is used inside loop.";
34485 arm_target_insn_ok_for_lob (rtx insn
)
34487 basic_block bb
= BLOCK_FOR_INSN (insn
);
34488 /* Make sure the basic block of the target insn is a simple latch
34489 having as single predecessor and successor the body of the loop
34490 itself. Only simple loops with a single basic block as body are
34491 supported for 'low over head loop' making sure that LE target is
34492 above LE itself in the generated code. */
34494 return single_succ_p (bb
)
34495 && single_pred_p (bb
)
34496 && single_succ_edge (bb
)->dest
== single_pred_edge (bb
)->src
34497 && contains_no_active_insn_p (bb
);
34501 namespace selftest
{
34503 /* Scan the static data tables generated by parsecpu.awk looking for
34504 potential issues with the data. We primarily check for
34505 inconsistencies in the option extensions at present (extensions
34506 that duplicate others but aren't marked as aliases). Furthermore,
34507 for correct canonicalization later options must never be a subset
34508 of an earlier option. Any extension should also only specify other
34509 feature bits and never an architecture bit. The architecture is inferred
34510 from the declaration of the extension. */
34512 arm_test_cpu_arch_data (void)
34514 const arch_option
*arch
;
34515 const cpu_option
*cpu
;
34516 auto_sbitmap
target_isa (isa_num_bits
);
34517 auto_sbitmap
isa1 (isa_num_bits
);
34518 auto_sbitmap
isa2 (isa_num_bits
);
34520 for (arch
= all_architectures
; arch
->common
.name
!= NULL
; ++arch
)
34522 const cpu_arch_extension
*ext1
, *ext2
;
34524 if (arch
->common
.extensions
== NULL
)
34527 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
34529 for (ext1
= arch
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
34534 arm_initialize_isa (isa1
, ext1
->isa_bits
);
34535 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
34537 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
34540 arm_initialize_isa (isa2
, ext2
->isa_bits
);
34541 /* If the option is a subset of the parent option, it doesn't
34542 add anything and so isn't useful. */
34543 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
34545 /* If the extension specifies any architectural bits then
34546 disallow it. Extensions should only specify feature bits. */
34547 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
34552 for (cpu
= all_cores
; cpu
->common
.name
!= NULL
; ++cpu
)
34554 const cpu_arch_extension
*ext1
, *ext2
;
34556 if (cpu
->common
.extensions
== NULL
)
34559 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
34561 for (ext1
= cpu
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
34566 arm_initialize_isa (isa1
, ext1
->isa_bits
);
34567 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
34569 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
34572 arm_initialize_isa (isa2
, ext2
->isa_bits
);
34573 /* If the option is a subset of the parent option, it doesn't
34574 add anything and so isn't useful. */
34575 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
34577 /* If the extension specifies any architectural bits then
34578 disallow it. Extensions should only specify feature bits. */
34579 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
34585 /* Scan the static data tables generated by parsecpu.awk looking for
34586 potential issues with the data. Here we check for consistency between the
34587 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34588 a feature bit that is not defined by any FPU flag. */
34590 arm_test_fpu_data (void)
34592 auto_sbitmap
isa_all_fpubits_internal (isa_num_bits
);
34593 auto_sbitmap
fpubits (isa_num_bits
);
34594 auto_sbitmap
tmpset (isa_num_bits
);
34596 static const enum isa_feature fpu_bitlist_internal
[]
34597 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
34598 arm_initialize_isa (isa_all_fpubits_internal
, fpu_bitlist_internal
);
34600 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
34602 arm_initialize_isa (fpubits
, all_fpus
[i
].isa_bits
);
34603 bitmap_and_compl (tmpset
, isa_all_fpubits_internal
, fpubits
);
34604 bitmap_clear (isa_all_fpubits_internal
);
34605 bitmap_copy (isa_all_fpubits_internal
, tmpset
);
34608 if (!bitmap_empty_p (isa_all_fpubits_internal
))
34610 fprintf (stderr
, "Error: found feature bits in the ALL_FPU_INTERAL"
34611 " group that are not defined by any FPU.\n"
34612 " Check your arm-cpus.in.\n");
34613 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal
));
34618 arm_run_selftests (void)
34620 arm_test_cpu_arch_data ();
34621 arm_test_fpu_data ();
34623 } /* Namespace selftest. */
34625 #undef TARGET_RUN_TARGET_SELFTESTS
34626 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34627 #endif /* CHECKING_P */
34629 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34630 global variable based guard use the default else
34631 return a null tree. */
34633 arm_stack_protect_guard (void)
34635 if (arm_stack_protector_guard
== SSP_GLOBAL
)
34636 return default_stack_protect_guard ();
34641 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34642 Unlike the arm version, we do NOT implement asm flag outputs. */
34645 thumb1_md_asm_adjust (vec
<rtx
> &outputs
, vec
<rtx
> & /*inputs*/,
34646 vec
<machine_mode
> & /*input_modes*/,
34647 vec
<const char *> &constraints
,
34648 vec
<rtx
> &, vec
<rtx
> & /*clobbers*/,
34649 HARD_REG_SET
& /*clobbered_regs*/, location_t
/*loc*/)
34651 for (unsigned i
= 0, n
= outputs
.length (); i
< n
; ++i
)
34652 if (startswith (constraints
[i
], "=@cc"))
34654 sorry ("%<asm%> flags not supported in thumb1 mode");
34660 /* Generate code to enable conditional branches in functions over 1 MiB.
34662 operands: is the operands list of the asm insn (see arm_cond_branch or
34663 arm_cond_branch_reversed).
34664 pos_label: is an index into the operands array where operands[pos_label] is
34665 the asm label of the final jump destination.
34666 dest: is a string which is used to generate the asm label of the intermediate
34668 branch_format: is a string denoting the intermediate branch format, e.g.
34669 "beq", "bne", etc. */
34672 arm_gen_far_branch (rtx
* operands
, int pos_label
, const char * dest
,
34673 const char * branch_format
)
34675 rtx_code_label
* tmp_label
= gen_label_rtx ();
34676 char label_buf
[256];
34678 ASM_GENERATE_INTERNAL_LABEL (label_buf
, dest
, \
34679 CODE_LABEL_NUMBER (tmp_label
));
34680 const char *label_ptr
= arm_strip_name_encoding (label_buf
);
34681 rtx dest_label
= operands
[pos_label
];
34682 operands
[pos_label
] = tmp_label
;
34684 snprintf (buffer
, sizeof (buffer
), "%s%s", branch_format
, label_ptr
);
34685 output_asm_insn (buffer
, operands
);
34687 snprintf (buffer
, sizeof (buffer
), "b\t%%l0%d\n%s:", pos_label
, label_ptr
);
34688 operands
[pos_label
] = dest_label
;
34689 output_asm_insn (buffer
, operands
);
34693 /* If given mode matches, load from memory to LO_REGS.
34694 (i.e [Rn], Rn <= LO_REGS). */
34696 arm_mode_base_reg_class (machine_mode mode
)
34698 if (TARGET_HAVE_MVE
34699 && (mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
))
34702 return MODE_BASE_REG_REG_CLASS (mode
);
34705 struct gcc_target targetm
= TARGET_INITIALIZER
;
34707 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
34710 arm_get_mask_mode (machine_mode mode
)
34712 if (TARGET_HAVE_MVE
)
34713 return arm_mode_to_pred_mode (mode
);
34715 return default_get_mask_mode (mode
);
34718 /* Output assembly to read the thread pointer from the appropriate TPIDR
34719 register into DEST. If PRED_P also emit the %? that can be used to
34720 output the predication code. */
34723 arm_output_load_tpidr (rtx dst
, bool pred_p
)
34726 int tpidr_coproc_num
= -1;
34727 switch (target_thread_pointer
)
34730 tpidr_coproc_num
= 2;
34733 tpidr_coproc_num
= 3;
34736 tpidr_coproc_num
= 4;
34739 gcc_unreachable ();
34741 snprintf (buf
, sizeof (buf
),
34742 "mrc%s\tp15, 0, %%0, c13, c0, %d\t@ load_tp_hard",
34743 pred_p
? "%?" : "", tpidr_coproc_num
);
34744 output_asm_insn (buf
, &dst
);
34748 #include "gt-arm.h"