1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2023 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
26 #define INCLUDE_STRING
28 #include "coretypes.h"
38 #include "stringpool.h"
45 #include "diagnostic-core.h"
47 #include "fold-const.h"
48 #include "stor-layout.h"
52 #include "insn-attr.h"
58 #include "sched-int.h"
59 #include "common/common-target.h"
60 #include "langhooks.h"
65 #include "target-globals.h"
67 #include "tm-constrs.h"
69 #include "optabs-libfuncs.h"
73 #include "tree-vectorizer.h"
75 #include "aarch-common.h"
76 #include "aarch-common-protos.h"
78 /* This file should be included last. */
79 #include "target-def.h"
81 /* Forward definitions of types. */
82 typedef struct minipool_node Mnode
;
83 typedef struct minipool_fixup Mfix
;
85 void (*arm_lang_output_object_attributes_hook
)(void);
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx
);
94 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets
*arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
100 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap
);
103 static int arm_address_register_rtx_p (rtx
, int);
104 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
105 static bool is_called_in_ARM_mode (tree
);
106 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
107 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
108 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
109 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
110 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
111 inline static int thumb1_index_register_rtx_p (rtx
, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx
, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx
, int);
117 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
118 static bool arm_print_operand_punct_valid_p (unsigned char code
);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
120 static arm_cc
get_arm_condition_code (rtx
);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx
*, const char *, const char *,
124 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
125 static struct machine_function
*arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
128 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
129 static Mnode
*add_minipool_forward_ref (Mfix
*);
130 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
131 static Mnode
*add_minipool_backward_ref (Mfix
*);
132 static void assign_minipool_offsets (Mfix
*);
133 static void arm_print_value (FILE *, rtx
);
134 static void dump_minipool (rtx_insn
*);
135 static int arm_barrier_cost (rtx_insn
*);
136 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
137 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
138 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree
);
145 static unsigned long arm_compute_func_type (void);
146 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
147 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
148 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
152 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
153 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree
, const_tree
);
157 static void arm_set_default_type_attributes (tree
);
158 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code
,
161 unsigned HOST_WIDE_INT val
,
162 struct four_ints
*return_sequence
);
163 static int optimal_immediate_sequence_1 (enum rtx_code code
,
164 unsigned HOST_WIDE_INT val
,
165 struct four_ints
*return_sequence
,
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree
, tree
);
169 static machine_mode
arm_promote_function_mode (const_tree
,
172 static bool arm_return_in_memory (const_tree
, const_tree
);
173 static rtx
arm_function_value (const_tree
, const_tree
, bool);
174 static rtx
arm_libcall_value_1 (machine_mode
);
175 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
182 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
183 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
184 static int arm_insn_cost (rtx_insn
*, bool);
185 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
186 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
187 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
188 static void emit_constant_insn (rtx cond
, rtx pattern
);
189 static rtx_insn
*emit_set_insn (rtx
, rtx
);
190 static void arm_add_cfa_adjust_cfa_note (rtx
, int, rtx
, rtx
);
191 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
192 static void arm_emit_multi_reg_pop (unsigned long);
193 static int vfp_emit_fstmd (int, int);
194 static void arm_emit_vfp_multi_reg_pop (int, int, rtx
);
195 static int arm_arg_partial_bytes (cumulative_args_t
,
196 const function_arg_info
&);
197 static rtx
arm_function_arg (cumulative_args_t
, const function_arg_info
&);
198 static void arm_function_arg_advance (cumulative_args_t
,
199 const function_arg_info
&);
200 static pad_direction
arm_function_arg_padding (machine_mode
, const_tree
);
201 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
202 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
204 static rtx
aapcs_libcall_value (machine_mode
);
205 static int aapcs_select_return_coproc (const_tree
, const_tree
);
207 #ifdef OBJECT_FORMAT_ELF
208 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
209 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
212 static void arm_encode_section_info (tree
, rtx
, int);
215 static void arm_file_end (void);
216 static void arm_file_start (void);
217 static void arm_insert_attributes (tree
, tree
*);
219 static void arm_setup_incoming_varargs (cumulative_args_t
,
220 const function_arg_info
&, int *, int);
221 static bool arm_pass_by_reference (cumulative_args_t
,
222 const function_arg_info
&);
223 static bool arm_promote_prototypes (const_tree
);
224 static bool arm_default_short_enums (void);
225 static bool arm_align_anon_bitfield (void);
226 static bool arm_return_in_msb (const_tree
);
227 static bool arm_must_pass_in_stack (const function_arg_info
&);
228 static bool arm_return_in_memory (const_tree
, const_tree
);
230 static void arm_unwind_emit (FILE *, rtx_insn
*);
231 static bool arm_output_ttype (rtx
);
232 static void arm_asm_emit_except_personality (rtx
);
234 static void arm_asm_init_sections (void);
235 static rtx
arm_dwarf_register_span (rtx
);
237 static tree
arm_cxx_guard_type (void);
238 static bool arm_cxx_guard_mask_bit (void);
239 static tree
arm_get_cookie_size (tree
);
240 static bool arm_cookie_has_size (void);
241 static bool arm_cxx_cdtor_returns_this (void);
242 static bool arm_cxx_key_method_may_be_inline (void);
243 static void arm_cxx_determine_class_data_visibility (tree
);
244 static bool arm_cxx_class_data_always_comdat (void);
245 static bool arm_cxx_use_aeabi_atexit (void);
246 static void arm_init_libfuncs (void);
247 static tree
arm_build_builtin_va_list (void);
248 static void arm_expand_builtin_va_start (tree
, rtx
);
249 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
250 static void arm_option_override (void);
251 static void arm_option_restore (struct gcc_options
*, struct gcc_options
*,
252 struct cl_target_option
*);
253 static void arm_override_options_after_change (void);
254 static void arm_option_print (FILE *, int, struct cl_target_option
*);
255 static void arm_set_current_function (tree
);
256 static bool arm_can_inline_p (tree
, tree
);
257 static void arm_relayout_function (tree
);
258 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
259 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
260 static bool arm_sched_can_speculate_insn (rtx_insn
*);
261 static bool arm_macro_fusion_p (void);
262 static bool arm_cannot_copy_insn_p (rtx_insn
*);
263 static int arm_issue_rate (void);
264 static int arm_sched_variable_issue (FILE *, int, rtx_insn
*, int);
265 static int arm_first_cycle_multipass_dfa_lookahead (void);
266 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
267 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
268 static bool arm_output_addr_const_extra (FILE *, rtx
);
269 static bool arm_allocate_stack_slots_for_args (void);
270 static bool arm_warn_func_return (tree
);
271 static tree
arm_promoted_type (const_tree t
);
272 static bool arm_scalar_mode_supported_p (scalar_mode
);
273 static bool arm_frame_pointer_required (void);
274 static bool arm_can_eliminate (const int, const int);
275 static void arm_asm_trampoline_template (FILE *);
276 static void arm_trampoline_init (rtx
, tree
, rtx
);
277 static rtx
arm_trampoline_adjust_address (rtx
);
278 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
279 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
280 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
281 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
282 static bool arm_array_mode_supported_p (machine_mode
,
283 unsigned HOST_WIDE_INT
);
284 static machine_mode
arm_preferred_simd_mode (scalar_mode
);
285 static bool arm_class_likely_spilled_p (reg_class_t
);
286 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
287 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
288 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
292 static void arm_conditional_register_usage (void);
293 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
294 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
295 static unsigned int arm_autovectorize_vector_modes (vector_modes
*, bool);
296 static int arm_default_branch_cost (bool, bool);
297 static int arm_cortex_a5_branch_cost (bool, bool);
298 static int arm_cortex_m_branch_cost (bool, bool);
299 static int arm_cortex_m7_branch_cost (bool, bool);
301 static bool arm_vectorize_vec_perm_const (machine_mode
, machine_mode
, rtx
, rtx
,
302 rtx
, const vec_perm_indices
&);
304 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
306 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
308 int misalign ATTRIBUTE_UNUSED
);
310 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
311 bool op0_preserve_value
);
312 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
314 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
315 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
317 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
318 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
319 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
321 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
322 static opt_scalar_float_mode
arm_floatn_mode (int, bool);
323 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode
);
324 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode
);
325 static bool arm_modes_tieable_p (machine_mode
, machine_mode
);
326 static HOST_WIDE_INT
arm_constant_alignment (const_tree
, HOST_WIDE_INT
);
327 static rtx_insn
*thumb1_md_asm_adjust (vec
<rtx
> &, vec
<rtx
> &,
329 vec
<const char *> &, vec
<rtx
> &,
330 HARD_REG_SET
&, location_t
);
331 static const char *arm_identify_fpu_from_isa (sbitmap
);
333 /* Table of machine attributes. */
334 static const struct attribute_spec arm_attribute_table
[] =
336 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
337 affects_type_identity, handler, exclude } */
338 /* Function calls made to this symbol must be done indirectly, because
339 it may lie outside of the 26 bit addressing range of a normal function
341 { "long_call", 0, 0, false, true, true, false, NULL
, NULL
},
342 /* Whereas these functions are always known to reside within the 26 bit
344 { "short_call", 0, 0, false, true, true, false, NULL
, NULL
},
345 /* Specify the procedure call conventions for a function. */
346 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute
,
348 /* Interrupt Service Routines have special prologue and epilogue requirements. */
349 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
351 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
353 { "naked", 0, 0, true, false, false, false,
354 arm_handle_fndecl_attribute
, NULL
},
356 /* ARM/PE has three new attributes:
358 dllexport - for exporting a function/variable that will live in a dll
359 dllimport - for importing a function/variable from a dll
361 Microsoft allows multiple declspecs in one __declspec, separating
362 them with spaces. We do NOT support this. Instead, use __declspec
365 { "dllimport", 0, 0, true, false, false, false, NULL
, NULL
},
366 { "dllexport", 0, 0, true, false, false, false, NULL
, NULL
},
367 { "interfacearm", 0, 0, true, false, false, false,
368 arm_handle_fndecl_attribute
, NULL
},
369 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
370 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute
,
372 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute
,
374 { "notshared", 0, 0, false, true, false, false,
375 arm_handle_notshared_attribute
, NULL
},
377 /* ARMv8-M Security Extensions support. */
378 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
379 arm_handle_cmse_nonsecure_entry
, NULL
},
380 { "cmse_nonsecure_call", 0, 0, false, false, false, true,
381 arm_handle_cmse_nonsecure_call
, NULL
},
382 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL
, NULL
},
383 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
386 /* Initialize the GCC target structure. */
387 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
388 #undef TARGET_MERGE_DECL_ATTRIBUTES
389 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
392 #undef TARGET_CHECK_BUILTIN_CALL
393 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
395 #undef TARGET_LEGITIMIZE_ADDRESS
396 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
398 #undef TARGET_ATTRIBUTE_TABLE
399 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
401 #undef TARGET_INSERT_ATTRIBUTES
402 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
404 #undef TARGET_ASM_FILE_START
405 #define TARGET_ASM_FILE_START arm_file_start
406 #undef TARGET_ASM_FILE_END
407 #define TARGET_ASM_FILE_END arm_file_end
409 #undef TARGET_ASM_ALIGNED_SI_OP
410 #define TARGET_ASM_ALIGNED_SI_OP NULL
411 #undef TARGET_ASM_INTEGER
412 #define TARGET_ASM_INTEGER arm_assemble_integer
414 #undef TARGET_PRINT_OPERAND
415 #define TARGET_PRINT_OPERAND arm_print_operand
416 #undef TARGET_PRINT_OPERAND_ADDRESS
417 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
418 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
419 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
421 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
422 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
424 #undef TARGET_ASM_FUNCTION_PROLOGUE
425 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
427 #undef TARGET_ASM_FUNCTION_EPILOGUE
428 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
430 #undef TARGET_CAN_INLINE_P
431 #define TARGET_CAN_INLINE_P arm_can_inline_p
433 #undef TARGET_RELAYOUT_FUNCTION
434 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
436 #undef TARGET_OPTION_OVERRIDE
437 #define TARGET_OPTION_OVERRIDE arm_option_override
439 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
440 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
442 #undef TARGET_OPTION_RESTORE
443 #define TARGET_OPTION_RESTORE arm_option_restore
445 #undef TARGET_OPTION_PRINT
446 #define TARGET_OPTION_PRINT arm_option_print
448 #undef TARGET_COMP_TYPE_ATTRIBUTES
449 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
451 #undef TARGET_SCHED_CAN_SPECULATE_INSN
452 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
454 #undef TARGET_SCHED_MACRO_FUSION_P
455 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
457 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
458 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
460 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
461 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
463 #undef TARGET_SCHED_ADJUST_COST
464 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
466 #undef TARGET_SET_CURRENT_FUNCTION
467 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
469 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
470 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
472 #undef TARGET_SCHED_REORDER
473 #define TARGET_SCHED_REORDER arm_sched_reorder
475 #undef TARGET_REGISTER_MOVE_COST
476 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
478 #undef TARGET_MEMORY_MOVE_COST
479 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
481 #undef TARGET_ENCODE_SECTION_INFO
483 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
485 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
488 #undef TARGET_STRIP_NAME_ENCODING
489 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
491 #undef TARGET_ASM_INTERNAL_LABEL
492 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
494 #undef TARGET_FLOATN_MODE
495 #define TARGET_FLOATN_MODE arm_floatn_mode
497 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
498 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
500 #undef TARGET_FUNCTION_VALUE
501 #define TARGET_FUNCTION_VALUE arm_function_value
503 #undef TARGET_LIBCALL_VALUE
504 #define TARGET_LIBCALL_VALUE arm_libcall_value
506 #undef TARGET_FUNCTION_VALUE_REGNO_P
507 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
509 #undef TARGET_ASM_OUTPUT_MI_THUNK
510 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
511 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
512 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
514 #undef TARGET_RTX_COSTS
515 #define TARGET_RTX_COSTS arm_rtx_costs
516 #undef TARGET_ADDRESS_COST
517 #define TARGET_ADDRESS_COST arm_address_cost
518 #undef TARGET_INSN_COST
519 #define TARGET_INSN_COST arm_insn_cost
521 #undef TARGET_SHIFT_TRUNCATION_MASK
522 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
523 #undef TARGET_VECTOR_MODE_SUPPORTED_P
524 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
525 #undef TARGET_ARRAY_MODE_SUPPORTED_P
526 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
527 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
528 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
529 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
530 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
531 arm_autovectorize_vector_modes
533 #undef TARGET_MACHINE_DEPENDENT_REORG
534 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
536 #undef TARGET_INIT_BUILTINS
537 #define TARGET_INIT_BUILTINS arm_init_builtins
538 #undef TARGET_EXPAND_BUILTIN
539 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
540 #undef TARGET_BUILTIN_DECL
541 #define TARGET_BUILTIN_DECL arm_builtin_decl
543 #undef TARGET_INIT_LIBFUNCS
544 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
546 #undef TARGET_PROMOTE_FUNCTION_MODE
547 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
548 #undef TARGET_PROMOTE_PROTOTYPES
549 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
550 #undef TARGET_PASS_BY_REFERENCE
551 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
552 #undef TARGET_ARG_PARTIAL_BYTES
553 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
554 #undef TARGET_FUNCTION_ARG
555 #define TARGET_FUNCTION_ARG arm_function_arg
556 #undef TARGET_FUNCTION_ARG_ADVANCE
557 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
558 #undef TARGET_FUNCTION_ARG_PADDING
559 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
560 #undef TARGET_FUNCTION_ARG_BOUNDARY
561 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
563 #undef TARGET_SETUP_INCOMING_VARARGS
564 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
566 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
567 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
569 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
570 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
571 #undef TARGET_TRAMPOLINE_INIT
572 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
573 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
574 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
576 #undef TARGET_WARN_FUNC_RETURN
577 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
579 #undef TARGET_DEFAULT_SHORT_ENUMS
580 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
582 #undef TARGET_ALIGN_ANON_BITFIELD
583 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
585 #undef TARGET_NARROW_VOLATILE_BITFIELD
586 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
588 #undef TARGET_CXX_GUARD_TYPE
589 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
591 #undef TARGET_CXX_GUARD_MASK_BIT
592 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
594 #undef TARGET_CXX_GET_COOKIE_SIZE
595 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
597 #undef TARGET_CXX_COOKIE_HAS_SIZE
598 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
600 #undef TARGET_CXX_CDTOR_RETURNS_THIS
601 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
603 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
604 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
606 #undef TARGET_CXX_USE_AEABI_ATEXIT
607 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
609 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
610 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
611 arm_cxx_determine_class_data_visibility
613 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
614 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
616 #undef TARGET_RETURN_IN_MSB
617 #define TARGET_RETURN_IN_MSB arm_return_in_msb
619 #undef TARGET_RETURN_IN_MEMORY
620 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
622 #undef TARGET_MUST_PASS_IN_STACK
623 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
626 #undef TARGET_ASM_UNWIND_EMIT
627 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
629 /* EABI unwinding tables use a different format for the typeinfo tables. */
630 #undef TARGET_ASM_TTYPE
631 #define TARGET_ASM_TTYPE arm_output_ttype
633 #undef TARGET_ARM_EABI_UNWINDER
634 #define TARGET_ARM_EABI_UNWINDER true
636 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
637 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
639 #endif /* ARM_UNWIND_INFO */
641 #undef TARGET_ASM_INIT_SECTIONS
642 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
644 #undef TARGET_DWARF_REGISTER_SPAN
645 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
647 #undef TARGET_CANNOT_COPY_INSN_P
648 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
651 #undef TARGET_HAVE_TLS
652 #define TARGET_HAVE_TLS true
655 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
656 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
658 #undef TARGET_LEGITIMATE_CONSTANT_P
659 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
661 #undef TARGET_CANNOT_FORCE_CONST_MEM
662 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
664 #undef TARGET_MAX_ANCHOR_OFFSET
665 #define TARGET_MAX_ANCHOR_OFFSET 4095
667 /* The minimum is set such that the total size of the block
668 for a particular anchor is -4088 + 1 + 4095 bytes, which is
669 divisible by eight, ensuring natural spacing of anchors. */
670 #undef TARGET_MIN_ANCHOR_OFFSET
671 #define TARGET_MIN_ANCHOR_OFFSET -4088
673 #undef TARGET_SCHED_ISSUE_RATE
674 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
676 #undef TARGET_SCHED_VARIABLE_ISSUE
677 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
679 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
680 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
681 arm_first_cycle_multipass_dfa_lookahead
683 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
684 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
685 arm_first_cycle_multipass_dfa_lookahead_guard
687 #undef TARGET_MANGLE_TYPE
688 #define TARGET_MANGLE_TYPE arm_mangle_type
690 #undef TARGET_INVALID_CONVERSION
691 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
693 #undef TARGET_INVALID_UNARY_OP
694 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
696 #undef TARGET_INVALID_BINARY_OP
697 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
699 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
700 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
702 #undef TARGET_BUILD_BUILTIN_VA_LIST
703 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
704 #undef TARGET_EXPAND_BUILTIN_VA_START
705 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
706 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
707 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
710 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
711 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
714 #undef TARGET_LEGITIMATE_ADDRESS_P
715 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
717 #undef TARGET_PREFERRED_RELOAD_CLASS
718 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
720 #undef TARGET_PROMOTED_TYPE
721 #define TARGET_PROMOTED_TYPE arm_promoted_type
723 #undef TARGET_SCALAR_MODE_SUPPORTED_P
724 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
726 #undef TARGET_COMPUTE_FRAME_LAYOUT
727 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
729 #undef TARGET_FRAME_POINTER_REQUIRED
730 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
732 #undef TARGET_CAN_ELIMINATE
733 #define TARGET_CAN_ELIMINATE arm_can_eliminate
735 #undef TARGET_CONDITIONAL_REGISTER_USAGE
736 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
738 #undef TARGET_CLASS_LIKELY_SPILLED_P
739 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
741 #undef TARGET_VECTORIZE_BUILTINS
742 #define TARGET_VECTORIZE_BUILTINS
744 #undef TARGET_VECTOR_ALIGNMENT
745 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
747 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
748 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
749 arm_vector_alignment_reachable
751 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
752 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
753 arm_builtin_support_vector_misalignment
755 #undef TARGET_PREFERRED_RENAME_CLASS
756 #define TARGET_PREFERRED_RENAME_CLASS \
757 arm_preferred_rename_class
759 #undef TARGET_VECTORIZE_VEC_PERM_CONST
760 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
762 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
763 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
764 arm_builtin_vectorization_cost
766 #undef TARGET_CANONICALIZE_COMPARISON
767 #define TARGET_CANONICALIZE_COMPARISON \
768 arm_canonicalize_comparison
770 #undef TARGET_ASAN_SHADOW_OFFSET
771 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
773 #undef MAX_INSN_PER_IT_BLOCK
774 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
776 #undef TARGET_CAN_USE_DOLOOP_P
777 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
779 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
780 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
782 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
783 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
785 #undef TARGET_SCHED_FUSION_PRIORITY
786 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
788 #undef TARGET_ASM_FUNCTION_SECTION
789 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
791 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
792 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
794 #undef TARGET_SECTION_TYPE_FLAGS
795 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
797 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
798 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
800 #undef TARGET_C_EXCESS_PRECISION
801 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
803 /* Although the architecture reserves bits 0 and 1, only the former is
804 used for ARM/Thumb ISA selection in v7 and earlier versions. */
805 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
806 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
808 #undef TARGET_FIXED_CONDITION_CODE_REGS
809 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
811 #undef TARGET_HARD_REGNO_NREGS
812 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
813 #undef TARGET_HARD_REGNO_MODE_OK
814 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
816 #undef TARGET_MODES_TIEABLE_P
817 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
819 #undef TARGET_CAN_CHANGE_MODE_CLASS
820 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
822 #undef TARGET_CONSTANT_ALIGNMENT
823 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
825 #undef TARGET_INVALID_WITHIN_DOLOOP
826 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
828 #undef TARGET_MD_ASM_ADJUST
829 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
831 #undef TARGET_STACK_PROTECT_GUARD
832 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
834 #undef TARGET_VECTORIZE_GET_MASK_MODE
835 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
837 /* Obstack for minipool constant handling. */
838 static struct obstack minipool_obstack
;
839 static char * minipool_startobj
;
841 /* The maximum number of insns skipped which
842 will be conditionalised if possible. */
843 static int max_insns_skipped
= 5;
845 /* True if we are currently building a constant table. */
846 int making_const_table
;
848 /* The processor for which instructions should be scheduled. */
849 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
851 /* The current tuning set. */
852 const struct tune_params
*current_tune
;
854 /* Which floating point hardware to schedule for. */
857 /* Used for Thumb call_via trampolines. */
858 rtx thumb_call_via_label
[14];
859 static int thumb_call_reg_needed
;
861 /* The bits in this mask specify which instruction scheduling options should
863 unsigned int tune_flags
= 0;
865 /* The highest ARM architecture version supported by the
867 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
869 /* Active target architecture and tuning. */
871 struct arm_build_target arm_active_target
;
873 /* The following are used in the arm.md file as equivalents to bits
874 in the above two flag variables. */
876 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
879 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
882 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
885 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
888 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
891 /* Nonzero if this chip supports the ARM 6K extensions. */
894 /* Nonzero if this chip supports the ARM 6KZ extensions. */
897 /* Nonzero if instructions present in ARMv6-M can be used. */
900 /* Nonzero if this chip supports the ARM 7 extensions. */
903 /* Nonzero if this chip supports the Large Physical Address Extension. */
904 int arm_arch_lpae
= 0;
906 /* Nonzero if instructions not present in the 'M' profile can be used. */
907 int arm_arch_notm
= 0;
909 /* Nonzero if instructions present in ARMv7E-M can be used. */
912 /* Nonzero if instructions present in ARMv8 can be used. */
915 /* Nonzero if this chip supports the ARMv8.1 extensions. */
918 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
921 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
924 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
927 /* Nonzero if this chip supports the ARM Architecture 8-M Mainline
929 int arm_arch8m_main
= 0;
931 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
933 int arm_arch8_1m_main
= 0;
935 /* Nonzero if this chip supports the FP16 instructions extension of ARM
937 int arm_fp16_inst
= 0;
939 /* Nonzero if this chip can benefit from load scheduling. */
940 int arm_ld_sched
= 0;
942 /* Nonzero if this chip is a StrongARM. */
943 int arm_tune_strongarm
= 0;
945 /* Nonzero if this chip supports Intel Wireless MMX technology. */
946 int arm_arch_iwmmxt
= 0;
948 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
949 int arm_arch_iwmmxt2
= 0;
951 /* Nonzero if this chip is an XScale. */
952 int arm_arch_xscale
= 0;
954 /* Nonzero if tuning for XScale */
955 int arm_tune_xscale
= 0;
957 /* Nonzero if we want to tune for stores that access the write-buffer.
958 This typically means an ARM6 or ARM7 with MMU or MPU. */
959 int arm_tune_wbuf
= 0;
961 /* Nonzero if tuning for Cortex-A9. */
962 int arm_tune_cortex_a9
= 0;
964 /* Nonzero if we should define __THUMB_INTERWORK__ in the
966 XXX This is a bit of a hack, it's intended to help work around
967 problems in GLD which doesn't understand that armv5t code is
968 interworking clean. */
969 int arm_cpp_interwork
= 0;
971 /* Nonzero if chip supports Thumb 1. */
974 /* Nonzero if chip supports Thumb 2. */
977 /* Nonzero if chip supports integer division instruction. */
978 int arm_arch_arm_hwdiv
;
979 int arm_arch_thumb_hwdiv
;
981 /* Nonzero if chip disallows volatile memory access in IT block. */
982 int arm_arch_no_volatile_ce
;
984 /* Nonzero if we shouldn't use literal pools. */
985 bool arm_disable_literal_pool
= false;
987 /* The register number to be used for the PIC offset register. */
988 unsigned arm_pic_register
= INVALID_REGNUM
;
990 enum arm_pcs arm_pcs_default
;
992 /* For an explanation of these variables, see final_prescan_insn below. */
994 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
995 enum arm_cond_code arm_current_cc
;
998 int arm_target_label
;
999 /* The number of conditionally executed insns, including the current insn. */
1000 int arm_condexec_count
= 0;
1001 /* A bitmask specifying the patterns for the IT block.
1002 Zero means do not output an IT block before this insn. */
1003 int arm_condexec_mask
= 0;
1004 /* The number of bits used in arm_condexec_mask. */
1005 int arm_condexec_masklen
= 0;
1007 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1008 int arm_arch_crc
= 0;
1010 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1011 int arm_arch_dotprod
= 0;
1013 /* Nonzero if chip supports the ARMv8-M security extensions. */
1014 int arm_arch_cmse
= 0;
1016 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1017 int arm_m_profile_small_mul
= 0;
1019 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1020 int arm_arch_i8mm
= 0;
1022 /* Nonzero if chip supports the BFloat16 instructions. */
1023 int arm_arch_bf16
= 0;
1025 /* Nonzero if chip supports the Custom Datapath Extension. */
1026 int arm_arch_cde
= 0;
1027 int arm_arch_cde_coproc
= 0;
1028 const int arm_arch_cde_coproc_bits
[] = {
1029 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1032 /* The condition codes of the ARM, and the inverse function. */
1033 static const char * const arm_condition_codes
[] =
1035 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1036 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1039 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1040 int arm_regs_in_sequence
[] =
1042 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1045 #define DEF_FP_SYSREG(reg) #reg,
1046 const char *fp_sysreg_names
[NB_FP_SYSREGS
] = {
1049 #undef DEF_FP_SYSREG
1051 #define ARM_LSL_NAME "lsl"
1052 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1054 #define THUMB2_WORK_REGS \
1055 (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1056 | (1 << SP_REGNUM) \
1057 | (1 << PC_REGNUM) \
1058 | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM \
1059 ? (1 << PIC_OFFSET_TABLE_REGNUM) \
1062 /* Initialization code. */
1066 enum processor_type scheduler
;
1067 unsigned int tune_flags
;
1068 const struct tune_params
*tune
;
1071 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1072 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1079 /* arm generic vectorizer costs. */
1081 struct cpu_vec_costs arm_default_vec_cost
= {
1082 1, /* scalar_stmt_cost. */
1083 1, /* scalar load_cost. */
1084 1, /* scalar_store_cost. */
1085 1, /* vec_stmt_cost. */
1086 1, /* vec_to_scalar_cost. */
1087 1, /* scalar_to_vec_cost. */
1088 1, /* vec_align_load_cost. */
1089 1, /* vec_unalign_load_cost. */
1090 1, /* vec_unalign_store_cost. */
1091 1, /* vec_store_cost. */
1092 3, /* cond_taken_branch_cost. */
1093 1, /* cond_not_taken_branch_cost. */
1096 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1097 #include "aarch-cost-tables.h"
1101 const struct cpu_cost_table cortexa9_extra_costs
=
1108 COSTS_N_INSNS (1), /* shift_reg. */
1109 COSTS_N_INSNS (1), /* arith_shift. */
1110 COSTS_N_INSNS (2), /* arith_shift_reg. */
1112 COSTS_N_INSNS (1), /* log_shift_reg. */
1113 COSTS_N_INSNS (1), /* extend. */
1114 COSTS_N_INSNS (2), /* extend_arith. */
1115 COSTS_N_INSNS (1), /* bfi. */
1116 COSTS_N_INSNS (1), /* bfx. */
1120 true /* non_exec_costs_exec. */
1125 COSTS_N_INSNS (3), /* simple. */
1126 COSTS_N_INSNS (3), /* flag_setting. */
1127 COSTS_N_INSNS (2), /* extend. */
1128 COSTS_N_INSNS (3), /* add. */
1129 COSTS_N_INSNS (2), /* extend_add. */
1130 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1134 0, /* simple (N/A). */
1135 0, /* flag_setting (N/A). */
1136 COSTS_N_INSNS (4), /* extend. */
1138 COSTS_N_INSNS (4), /* extend_add. */
1144 COSTS_N_INSNS (2), /* load. */
1145 COSTS_N_INSNS (2), /* load_sign_extend. */
1146 COSTS_N_INSNS (2), /* ldrd. */
1147 COSTS_N_INSNS (2), /* ldm_1st. */
1148 1, /* ldm_regs_per_insn_1st. */
1149 2, /* ldm_regs_per_insn_subsequent. */
1150 COSTS_N_INSNS (5), /* loadf. */
1151 COSTS_N_INSNS (5), /* loadd. */
1152 COSTS_N_INSNS (1), /* load_unaligned. */
1153 COSTS_N_INSNS (2), /* store. */
1154 COSTS_N_INSNS (2), /* strd. */
1155 COSTS_N_INSNS (2), /* stm_1st. */
1156 1, /* stm_regs_per_insn_1st. */
1157 2, /* stm_regs_per_insn_subsequent. */
1158 COSTS_N_INSNS (1), /* storef. */
1159 COSTS_N_INSNS (1), /* stored. */
1160 COSTS_N_INSNS (1), /* store_unaligned. */
1161 COSTS_N_INSNS (1), /* loadv. */
1162 COSTS_N_INSNS (1) /* storev. */
1167 COSTS_N_INSNS (14), /* div. */
1168 COSTS_N_INSNS (4), /* mult. */
1169 COSTS_N_INSNS (7), /* mult_addsub. */
1170 COSTS_N_INSNS (30), /* fma. */
1171 COSTS_N_INSNS (3), /* addsub. */
1172 COSTS_N_INSNS (1), /* fpconst. */
1173 COSTS_N_INSNS (1), /* neg. */
1174 COSTS_N_INSNS (3), /* compare. */
1175 COSTS_N_INSNS (3), /* widen. */
1176 COSTS_N_INSNS (3), /* narrow. */
1177 COSTS_N_INSNS (3), /* toint. */
1178 COSTS_N_INSNS (3), /* fromint. */
1179 COSTS_N_INSNS (3) /* roundint. */
1183 COSTS_N_INSNS (24), /* div. */
1184 COSTS_N_INSNS (5), /* mult. */
1185 COSTS_N_INSNS (8), /* mult_addsub. */
1186 COSTS_N_INSNS (30), /* fma. */
1187 COSTS_N_INSNS (3), /* addsub. */
1188 COSTS_N_INSNS (1), /* fpconst. */
1189 COSTS_N_INSNS (1), /* neg. */
1190 COSTS_N_INSNS (3), /* compare. */
1191 COSTS_N_INSNS (3), /* widen. */
1192 COSTS_N_INSNS (3), /* narrow. */
1193 COSTS_N_INSNS (3), /* toint. */
1194 COSTS_N_INSNS (3), /* fromint. */
1195 COSTS_N_INSNS (3) /* roundint. */
1200 COSTS_N_INSNS (1), /* alu. */
1201 COSTS_N_INSNS (4), /* mult. */
1202 COSTS_N_INSNS (1), /* movi. */
1203 COSTS_N_INSNS (2), /* dup. */
1204 COSTS_N_INSNS (2) /* extract. */
1208 const struct cpu_cost_table cortexa8_extra_costs
=
1214 COSTS_N_INSNS (1), /* shift. */
1216 COSTS_N_INSNS (1), /* arith_shift. */
1217 0, /* arith_shift_reg. */
1218 COSTS_N_INSNS (1), /* log_shift. */
1219 0, /* log_shift_reg. */
1221 0, /* extend_arith. */
1227 true /* non_exec_costs_exec. */
1232 COSTS_N_INSNS (1), /* simple. */
1233 COSTS_N_INSNS (1), /* flag_setting. */
1234 COSTS_N_INSNS (1), /* extend. */
1235 COSTS_N_INSNS (1), /* add. */
1236 COSTS_N_INSNS (1), /* extend_add. */
1237 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1241 0, /* simple (N/A). */
1242 0, /* flag_setting (N/A). */
1243 COSTS_N_INSNS (2), /* extend. */
1245 COSTS_N_INSNS (2), /* extend_add. */
1251 COSTS_N_INSNS (1), /* load. */
1252 COSTS_N_INSNS (1), /* load_sign_extend. */
1253 COSTS_N_INSNS (1), /* ldrd. */
1254 COSTS_N_INSNS (1), /* ldm_1st. */
1255 1, /* ldm_regs_per_insn_1st. */
1256 2, /* ldm_regs_per_insn_subsequent. */
1257 COSTS_N_INSNS (1), /* loadf. */
1258 COSTS_N_INSNS (1), /* loadd. */
1259 COSTS_N_INSNS (1), /* load_unaligned. */
1260 COSTS_N_INSNS (1), /* store. */
1261 COSTS_N_INSNS (1), /* strd. */
1262 COSTS_N_INSNS (1), /* stm_1st. */
1263 1, /* stm_regs_per_insn_1st. */
1264 2, /* stm_regs_per_insn_subsequent. */
1265 COSTS_N_INSNS (1), /* storef. */
1266 COSTS_N_INSNS (1), /* stored. */
1267 COSTS_N_INSNS (1), /* store_unaligned. */
1268 COSTS_N_INSNS (1), /* loadv. */
1269 COSTS_N_INSNS (1) /* storev. */
1274 COSTS_N_INSNS (36), /* div. */
1275 COSTS_N_INSNS (11), /* mult. */
1276 COSTS_N_INSNS (20), /* mult_addsub. */
1277 COSTS_N_INSNS (30), /* fma. */
1278 COSTS_N_INSNS (9), /* addsub. */
1279 COSTS_N_INSNS (3), /* fpconst. */
1280 COSTS_N_INSNS (3), /* neg. */
1281 COSTS_N_INSNS (6), /* compare. */
1282 COSTS_N_INSNS (4), /* widen. */
1283 COSTS_N_INSNS (4), /* narrow. */
1284 COSTS_N_INSNS (8), /* toint. */
1285 COSTS_N_INSNS (8), /* fromint. */
1286 COSTS_N_INSNS (8) /* roundint. */
1290 COSTS_N_INSNS (64), /* div. */
1291 COSTS_N_INSNS (16), /* mult. */
1292 COSTS_N_INSNS (25), /* mult_addsub. */
1293 COSTS_N_INSNS (30), /* fma. */
1294 COSTS_N_INSNS (9), /* addsub. */
1295 COSTS_N_INSNS (3), /* fpconst. */
1296 COSTS_N_INSNS (3), /* neg. */
1297 COSTS_N_INSNS (6), /* compare. */
1298 COSTS_N_INSNS (6), /* widen. */
1299 COSTS_N_INSNS (6), /* narrow. */
1300 COSTS_N_INSNS (8), /* toint. */
1301 COSTS_N_INSNS (8), /* fromint. */
1302 COSTS_N_INSNS (8) /* roundint. */
1307 COSTS_N_INSNS (1), /* alu. */
1308 COSTS_N_INSNS (4), /* mult. */
1309 COSTS_N_INSNS (1), /* movi. */
1310 COSTS_N_INSNS (2), /* dup. */
1311 COSTS_N_INSNS (2) /* extract. */
1315 const struct cpu_cost_table cortexa5_extra_costs
=
1321 COSTS_N_INSNS (1), /* shift. */
1322 COSTS_N_INSNS (1), /* shift_reg. */
1323 COSTS_N_INSNS (1), /* arith_shift. */
1324 COSTS_N_INSNS (1), /* arith_shift_reg. */
1325 COSTS_N_INSNS (1), /* log_shift. */
1326 COSTS_N_INSNS (1), /* log_shift_reg. */
1327 COSTS_N_INSNS (1), /* extend. */
1328 COSTS_N_INSNS (1), /* extend_arith. */
1329 COSTS_N_INSNS (1), /* bfi. */
1330 COSTS_N_INSNS (1), /* bfx. */
1331 COSTS_N_INSNS (1), /* clz. */
1332 COSTS_N_INSNS (1), /* rev. */
1334 true /* non_exec_costs_exec. */
1341 COSTS_N_INSNS (1), /* flag_setting. */
1342 COSTS_N_INSNS (1), /* extend. */
1343 COSTS_N_INSNS (1), /* add. */
1344 COSTS_N_INSNS (1), /* extend_add. */
1345 COSTS_N_INSNS (7) /* idiv. */
1349 0, /* simple (N/A). */
1350 0, /* flag_setting (N/A). */
1351 COSTS_N_INSNS (1), /* extend. */
1353 COSTS_N_INSNS (2), /* extend_add. */
1359 COSTS_N_INSNS (1), /* load. */
1360 COSTS_N_INSNS (1), /* load_sign_extend. */
1361 COSTS_N_INSNS (6), /* ldrd. */
1362 COSTS_N_INSNS (1), /* ldm_1st. */
1363 1, /* ldm_regs_per_insn_1st. */
1364 2, /* ldm_regs_per_insn_subsequent. */
1365 COSTS_N_INSNS (2), /* loadf. */
1366 COSTS_N_INSNS (4), /* loadd. */
1367 COSTS_N_INSNS (1), /* load_unaligned. */
1368 COSTS_N_INSNS (1), /* store. */
1369 COSTS_N_INSNS (3), /* strd. */
1370 COSTS_N_INSNS (1), /* stm_1st. */
1371 1, /* stm_regs_per_insn_1st. */
1372 2, /* stm_regs_per_insn_subsequent. */
1373 COSTS_N_INSNS (2), /* storef. */
1374 COSTS_N_INSNS (2), /* stored. */
1375 COSTS_N_INSNS (1), /* store_unaligned. */
1376 COSTS_N_INSNS (1), /* loadv. */
1377 COSTS_N_INSNS (1) /* storev. */
1382 COSTS_N_INSNS (15), /* div. */
1383 COSTS_N_INSNS (3), /* mult. */
1384 COSTS_N_INSNS (7), /* mult_addsub. */
1385 COSTS_N_INSNS (7), /* fma. */
1386 COSTS_N_INSNS (3), /* addsub. */
1387 COSTS_N_INSNS (3), /* fpconst. */
1388 COSTS_N_INSNS (3), /* neg. */
1389 COSTS_N_INSNS (3), /* compare. */
1390 COSTS_N_INSNS (3), /* widen. */
1391 COSTS_N_INSNS (3), /* narrow. */
1392 COSTS_N_INSNS (3), /* toint. */
1393 COSTS_N_INSNS (3), /* fromint. */
1394 COSTS_N_INSNS (3) /* roundint. */
1398 COSTS_N_INSNS (30), /* div. */
1399 COSTS_N_INSNS (6), /* mult. */
1400 COSTS_N_INSNS (10), /* mult_addsub. */
1401 COSTS_N_INSNS (7), /* fma. */
1402 COSTS_N_INSNS (3), /* addsub. */
1403 COSTS_N_INSNS (3), /* fpconst. */
1404 COSTS_N_INSNS (3), /* neg. */
1405 COSTS_N_INSNS (3), /* compare. */
1406 COSTS_N_INSNS (3), /* widen. */
1407 COSTS_N_INSNS (3), /* narrow. */
1408 COSTS_N_INSNS (3), /* toint. */
1409 COSTS_N_INSNS (3), /* fromint. */
1410 COSTS_N_INSNS (3) /* roundint. */
1415 COSTS_N_INSNS (1), /* alu. */
1416 COSTS_N_INSNS (4), /* mult. */
1417 COSTS_N_INSNS (1), /* movi. */
1418 COSTS_N_INSNS (2), /* dup. */
1419 COSTS_N_INSNS (2) /* extract. */
1424 const struct cpu_cost_table cortexa7_extra_costs
=
1430 COSTS_N_INSNS (1), /* shift. */
1431 COSTS_N_INSNS (1), /* shift_reg. */
1432 COSTS_N_INSNS (1), /* arith_shift. */
1433 COSTS_N_INSNS (1), /* arith_shift_reg. */
1434 COSTS_N_INSNS (1), /* log_shift. */
1435 COSTS_N_INSNS (1), /* log_shift_reg. */
1436 COSTS_N_INSNS (1), /* extend. */
1437 COSTS_N_INSNS (1), /* extend_arith. */
1438 COSTS_N_INSNS (1), /* bfi. */
1439 COSTS_N_INSNS (1), /* bfx. */
1440 COSTS_N_INSNS (1), /* clz. */
1441 COSTS_N_INSNS (1), /* rev. */
1443 true /* non_exec_costs_exec. */
1450 COSTS_N_INSNS (1), /* flag_setting. */
1451 COSTS_N_INSNS (1), /* extend. */
1452 COSTS_N_INSNS (1), /* add. */
1453 COSTS_N_INSNS (1), /* extend_add. */
1454 COSTS_N_INSNS (7) /* idiv. */
1458 0, /* simple (N/A). */
1459 0, /* flag_setting (N/A). */
1460 COSTS_N_INSNS (1), /* extend. */
1462 COSTS_N_INSNS (2), /* extend_add. */
1468 COSTS_N_INSNS (1), /* load. */
1469 COSTS_N_INSNS (1), /* load_sign_extend. */
1470 COSTS_N_INSNS (3), /* ldrd. */
1471 COSTS_N_INSNS (1), /* ldm_1st. */
1472 1, /* ldm_regs_per_insn_1st. */
1473 2, /* ldm_regs_per_insn_subsequent. */
1474 COSTS_N_INSNS (2), /* loadf. */
1475 COSTS_N_INSNS (2), /* loadd. */
1476 COSTS_N_INSNS (1), /* load_unaligned. */
1477 COSTS_N_INSNS (1), /* store. */
1478 COSTS_N_INSNS (3), /* strd. */
1479 COSTS_N_INSNS (1), /* stm_1st. */
1480 1, /* stm_regs_per_insn_1st. */
1481 2, /* stm_regs_per_insn_subsequent. */
1482 COSTS_N_INSNS (2), /* storef. */
1483 COSTS_N_INSNS (2), /* stored. */
1484 COSTS_N_INSNS (1), /* store_unaligned. */
1485 COSTS_N_INSNS (1), /* loadv. */
1486 COSTS_N_INSNS (1) /* storev. */
1491 COSTS_N_INSNS (15), /* div. */
1492 COSTS_N_INSNS (3), /* mult. */
1493 COSTS_N_INSNS (7), /* mult_addsub. */
1494 COSTS_N_INSNS (7), /* fma. */
1495 COSTS_N_INSNS (3), /* addsub. */
1496 COSTS_N_INSNS (3), /* fpconst. */
1497 COSTS_N_INSNS (3), /* neg. */
1498 COSTS_N_INSNS (3), /* compare. */
1499 COSTS_N_INSNS (3), /* widen. */
1500 COSTS_N_INSNS (3), /* narrow. */
1501 COSTS_N_INSNS (3), /* toint. */
1502 COSTS_N_INSNS (3), /* fromint. */
1503 COSTS_N_INSNS (3) /* roundint. */
1507 COSTS_N_INSNS (30), /* div. */
1508 COSTS_N_INSNS (6), /* mult. */
1509 COSTS_N_INSNS (10), /* mult_addsub. */
1510 COSTS_N_INSNS (7), /* fma. */
1511 COSTS_N_INSNS (3), /* addsub. */
1512 COSTS_N_INSNS (3), /* fpconst. */
1513 COSTS_N_INSNS (3), /* neg. */
1514 COSTS_N_INSNS (3), /* compare. */
1515 COSTS_N_INSNS (3), /* widen. */
1516 COSTS_N_INSNS (3), /* narrow. */
1517 COSTS_N_INSNS (3), /* toint. */
1518 COSTS_N_INSNS (3), /* fromint. */
1519 COSTS_N_INSNS (3) /* roundint. */
1524 COSTS_N_INSNS (1), /* alu. */
1525 COSTS_N_INSNS (4), /* mult. */
1526 COSTS_N_INSNS (1), /* movi. */
1527 COSTS_N_INSNS (2), /* dup. */
1528 COSTS_N_INSNS (2) /* extract. */
1532 const struct cpu_cost_table cortexa12_extra_costs
=
1539 COSTS_N_INSNS (1), /* shift_reg. */
1540 COSTS_N_INSNS (1), /* arith_shift. */
1541 COSTS_N_INSNS (1), /* arith_shift_reg. */
1542 COSTS_N_INSNS (1), /* log_shift. */
1543 COSTS_N_INSNS (1), /* log_shift_reg. */
1545 COSTS_N_INSNS (1), /* extend_arith. */
1547 COSTS_N_INSNS (1), /* bfx. */
1548 COSTS_N_INSNS (1), /* clz. */
1549 COSTS_N_INSNS (1), /* rev. */
1551 true /* non_exec_costs_exec. */
1556 COSTS_N_INSNS (2), /* simple. */
1557 COSTS_N_INSNS (3), /* flag_setting. */
1558 COSTS_N_INSNS (2), /* extend. */
1559 COSTS_N_INSNS (3), /* add. */
1560 COSTS_N_INSNS (2), /* extend_add. */
1561 COSTS_N_INSNS (18) /* idiv. */
1565 0, /* simple (N/A). */
1566 0, /* flag_setting (N/A). */
1567 COSTS_N_INSNS (3), /* extend. */
1569 COSTS_N_INSNS (3), /* extend_add. */
1575 COSTS_N_INSNS (3), /* load. */
1576 COSTS_N_INSNS (3), /* load_sign_extend. */
1577 COSTS_N_INSNS (3), /* ldrd. */
1578 COSTS_N_INSNS (3), /* ldm_1st. */
1579 1, /* ldm_regs_per_insn_1st. */
1580 2, /* ldm_regs_per_insn_subsequent. */
1581 COSTS_N_INSNS (3), /* loadf. */
1582 COSTS_N_INSNS (3), /* loadd. */
1583 0, /* load_unaligned. */
1587 1, /* stm_regs_per_insn_1st. */
1588 2, /* stm_regs_per_insn_subsequent. */
1589 COSTS_N_INSNS (2), /* storef. */
1590 COSTS_N_INSNS (2), /* stored. */
1591 0, /* store_unaligned. */
1592 COSTS_N_INSNS (1), /* loadv. */
1593 COSTS_N_INSNS (1) /* storev. */
1598 COSTS_N_INSNS (17), /* div. */
1599 COSTS_N_INSNS (4), /* mult. */
1600 COSTS_N_INSNS (8), /* mult_addsub. */
1601 COSTS_N_INSNS (8), /* fma. */
1602 COSTS_N_INSNS (4), /* addsub. */
1603 COSTS_N_INSNS (2), /* fpconst. */
1604 COSTS_N_INSNS (2), /* neg. */
1605 COSTS_N_INSNS (2), /* compare. */
1606 COSTS_N_INSNS (4), /* widen. */
1607 COSTS_N_INSNS (4), /* narrow. */
1608 COSTS_N_INSNS (4), /* toint. */
1609 COSTS_N_INSNS (4), /* fromint. */
1610 COSTS_N_INSNS (4) /* roundint. */
1614 COSTS_N_INSNS (31), /* div. */
1615 COSTS_N_INSNS (4), /* mult. */
1616 COSTS_N_INSNS (8), /* mult_addsub. */
1617 COSTS_N_INSNS (8), /* fma. */
1618 COSTS_N_INSNS (4), /* addsub. */
1619 COSTS_N_INSNS (2), /* fpconst. */
1620 COSTS_N_INSNS (2), /* neg. */
1621 COSTS_N_INSNS (2), /* compare. */
1622 COSTS_N_INSNS (4), /* widen. */
1623 COSTS_N_INSNS (4), /* narrow. */
1624 COSTS_N_INSNS (4), /* toint. */
1625 COSTS_N_INSNS (4), /* fromint. */
1626 COSTS_N_INSNS (4) /* roundint. */
1631 COSTS_N_INSNS (1), /* alu. */
1632 COSTS_N_INSNS (4), /* mult. */
1633 COSTS_N_INSNS (1), /* movi. */
1634 COSTS_N_INSNS (2), /* dup. */
1635 COSTS_N_INSNS (2) /* extract. */
1639 const struct cpu_cost_table cortexa15_extra_costs
=
1647 COSTS_N_INSNS (1), /* arith_shift. */
1648 COSTS_N_INSNS (1), /* arith_shift_reg. */
1649 COSTS_N_INSNS (1), /* log_shift. */
1650 COSTS_N_INSNS (1), /* log_shift_reg. */
1652 COSTS_N_INSNS (1), /* extend_arith. */
1653 COSTS_N_INSNS (1), /* bfi. */
1658 true /* non_exec_costs_exec. */
1663 COSTS_N_INSNS (2), /* simple. */
1664 COSTS_N_INSNS (3), /* flag_setting. */
1665 COSTS_N_INSNS (2), /* extend. */
1666 COSTS_N_INSNS (2), /* add. */
1667 COSTS_N_INSNS (2), /* extend_add. */
1668 COSTS_N_INSNS (18) /* idiv. */
1672 0, /* simple (N/A). */
1673 0, /* flag_setting (N/A). */
1674 COSTS_N_INSNS (3), /* extend. */
1676 COSTS_N_INSNS (3), /* extend_add. */
1682 COSTS_N_INSNS (3), /* load. */
1683 COSTS_N_INSNS (3), /* load_sign_extend. */
1684 COSTS_N_INSNS (3), /* ldrd. */
1685 COSTS_N_INSNS (4), /* ldm_1st. */
1686 1, /* ldm_regs_per_insn_1st. */
1687 2, /* ldm_regs_per_insn_subsequent. */
1688 COSTS_N_INSNS (4), /* loadf. */
1689 COSTS_N_INSNS (4), /* loadd. */
1690 0, /* load_unaligned. */
1693 COSTS_N_INSNS (1), /* stm_1st. */
1694 1, /* stm_regs_per_insn_1st. */
1695 2, /* stm_regs_per_insn_subsequent. */
1698 0, /* store_unaligned. */
1699 COSTS_N_INSNS (1), /* loadv. */
1700 COSTS_N_INSNS (1) /* storev. */
1705 COSTS_N_INSNS (17), /* div. */
1706 COSTS_N_INSNS (4), /* mult. */
1707 COSTS_N_INSNS (8), /* mult_addsub. */
1708 COSTS_N_INSNS (8), /* fma. */
1709 COSTS_N_INSNS (4), /* addsub. */
1710 COSTS_N_INSNS (2), /* fpconst. */
1711 COSTS_N_INSNS (2), /* neg. */
1712 COSTS_N_INSNS (5), /* compare. */
1713 COSTS_N_INSNS (4), /* widen. */
1714 COSTS_N_INSNS (4), /* narrow. */
1715 COSTS_N_INSNS (4), /* toint. */
1716 COSTS_N_INSNS (4), /* fromint. */
1717 COSTS_N_INSNS (4) /* roundint. */
1721 COSTS_N_INSNS (31), /* div. */
1722 COSTS_N_INSNS (4), /* mult. */
1723 COSTS_N_INSNS (8), /* mult_addsub. */
1724 COSTS_N_INSNS (8), /* fma. */
1725 COSTS_N_INSNS (4), /* addsub. */
1726 COSTS_N_INSNS (2), /* fpconst. */
1727 COSTS_N_INSNS (2), /* neg. */
1728 COSTS_N_INSNS (2), /* compare. */
1729 COSTS_N_INSNS (4), /* widen. */
1730 COSTS_N_INSNS (4), /* narrow. */
1731 COSTS_N_INSNS (4), /* toint. */
1732 COSTS_N_INSNS (4), /* fromint. */
1733 COSTS_N_INSNS (4) /* roundint. */
1738 COSTS_N_INSNS (1), /* alu. */
1739 COSTS_N_INSNS (4), /* mult. */
1740 COSTS_N_INSNS (1), /* movi. */
1741 COSTS_N_INSNS (2), /* dup. */
1742 COSTS_N_INSNS (2) /* extract. */
1746 const struct cpu_cost_table v7m_extra_costs
=
1754 0, /* arith_shift. */
1755 COSTS_N_INSNS (1), /* arith_shift_reg. */
1757 COSTS_N_INSNS (1), /* log_shift_reg. */
1759 COSTS_N_INSNS (1), /* extend_arith. */
1764 COSTS_N_INSNS (1), /* non_exec. */
1765 false /* non_exec_costs_exec. */
1770 COSTS_N_INSNS (1), /* simple. */
1771 COSTS_N_INSNS (1), /* flag_setting. */
1772 COSTS_N_INSNS (2), /* extend. */
1773 COSTS_N_INSNS (1), /* add. */
1774 COSTS_N_INSNS (3), /* extend_add. */
1775 COSTS_N_INSNS (8) /* idiv. */
1779 0, /* simple (N/A). */
1780 0, /* flag_setting (N/A). */
1781 COSTS_N_INSNS (2), /* extend. */
1783 COSTS_N_INSNS (3), /* extend_add. */
1789 COSTS_N_INSNS (2), /* load. */
1790 0, /* load_sign_extend. */
1791 COSTS_N_INSNS (3), /* ldrd. */
1792 COSTS_N_INSNS (2), /* ldm_1st. */
1793 1, /* ldm_regs_per_insn_1st. */
1794 1, /* ldm_regs_per_insn_subsequent. */
1795 COSTS_N_INSNS (2), /* loadf. */
1796 COSTS_N_INSNS (3), /* loadd. */
1797 COSTS_N_INSNS (1), /* load_unaligned. */
1798 COSTS_N_INSNS (2), /* store. */
1799 COSTS_N_INSNS (3), /* strd. */
1800 COSTS_N_INSNS (2), /* stm_1st. */
1801 1, /* stm_regs_per_insn_1st. */
1802 1, /* stm_regs_per_insn_subsequent. */
1803 COSTS_N_INSNS (2), /* storef. */
1804 COSTS_N_INSNS (3), /* stored. */
1805 COSTS_N_INSNS (1), /* store_unaligned. */
1806 COSTS_N_INSNS (1), /* loadv. */
1807 COSTS_N_INSNS (1) /* storev. */
1812 COSTS_N_INSNS (7), /* div. */
1813 COSTS_N_INSNS (2), /* mult. */
1814 COSTS_N_INSNS (5), /* mult_addsub. */
1815 COSTS_N_INSNS (3), /* fma. */
1816 COSTS_N_INSNS (1), /* addsub. */
1828 COSTS_N_INSNS (15), /* div. */
1829 COSTS_N_INSNS (5), /* mult. */
1830 COSTS_N_INSNS (7), /* mult_addsub. */
1831 COSTS_N_INSNS (7), /* fma. */
1832 COSTS_N_INSNS (3), /* addsub. */
1845 COSTS_N_INSNS (1), /* alu. */
1846 COSTS_N_INSNS (4), /* mult. */
1847 COSTS_N_INSNS (1), /* movi. */
1848 COSTS_N_INSNS (2), /* dup. */
1849 COSTS_N_INSNS (2) /* extract. */
1853 const struct addr_mode_cost_table generic_addr_mode_costs
=
1857 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1858 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1859 COSTS_N_INSNS (0) /* AMO_WB. */
1863 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1864 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1865 COSTS_N_INSNS (0) /* AMO_WB. */
1869 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1870 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1871 COSTS_N_INSNS (0) /* AMO_WB. */
1875 const struct tune_params arm_slowmul_tune
=
1877 &generic_extra_costs
, /* Insn extra costs. */
1878 &generic_addr_mode_costs
, /* Addressing mode costs. */
1879 NULL
, /* Sched adj cost. */
1880 arm_default_branch_cost
,
1881 &arm_default_vec_cost
,
1882 3, /* Constant limit. */
1883 5, /* Max cond insns. */
1884 8, /* Memset max inline. */
1885 1, /* Issue rate. */
1886 ARM_PREFETCH_NOT_BENEFICIAL
,
1887 tune_params::PREF_CONST_POOL_TRUE
,
1888 tune_params::PREF_LDRD_FALSE
,
1889 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1890 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1891 tune_params::DISPARAGE_FLAGS_NEITHER
,
1892 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1893 tune_params::FUSE_NOTHING
,
1894 tune_params::SCHED_AUTOPREF_OFF
1897 const struct tune_params arm_fastmul_tune
=
1899 &generic_extra_costs
, /* Insn extra costs. */
1900 &generic_addr_mode_costs
, /* Addressing mode costs. */
1901 NULL
, /* Sched adj cost. */
1902 arm_default_branch_cost
,
1903 &arm_default_vec_cost
,
1904 1, /* Constant limit. */
1905 5, /* Max cond insns. */
1906 8, /* Memset max inline. */
1907 1, /* Issue rate. */
1908 ARM_PREFETCH_NOT_BENEFICIAL
,
1909 tune_params::PREF_CONST_POOL_TRUE
,
1910 tune_params::PREF_LDRD_FALSE
,
1911 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1912 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1913 tune_params::DISPARAGE_FLAGS_NEITHER
,
1914 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1915 tune_params::FUSE_NOTHING
,
1916 tune_params::SCHED_AUTOPREF_OFF
1919 /* StrongARM has early execution of branches, so a sequence that is worth
1920 skipping is shorter. Set max_insns_skipped to a lower value. */
1922 const struct tune_params arm_strongarm_tune
=
1924 &generic_extra_costs
, /* Insn extra costs. */
1925 &generic_addr_mode_costs
, /* Addressing mode costs. */
1926 NULL
, /* Sched adj cost. */
1927 arm_default_branch_cost
,
1928 &arm_default_vec_cost
,
1929 1, /* Constant limit. */
1930 3, /* Max cond insns. */
1931 8, /* Memset max inline. */
1932 1, /* Issue rate. */
1933 ARM_PREFETCH_NOT_BENEFICIAL
,
1934 tune_params::PREF_CONST_POOL_TRUE
,
1935 tune_params::PREF_LDRD_FALSE
,
1936 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1937 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1938 tune_params::DISPARAGE_FLAGS_NEITHER
,
1939 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1940 tune_params::FUSE_NOTHING
,
1941 tune_params::SCHED_AUTOPREF_OFF
1944 const struct tune_params arm_xscale_tune
=
1946 &generic_extra_costs
, /* Insn extra costs. */
1947 &generic_addr_mode_costs
, /* Addressing mode costs. */
1948 xscale_sched_adjust_cost
,
1949 arm_default_branch_cost
,
1950 &arm_default_vec_cost
,
1951 2, /* Constant limit. */
1952 3, /* Max cond insns. */
1953 8, /* Memset max inline. */
1954 1, /* Issue rate. */
1955 ARM_PREFETCH_NOT_BENEFICIAL
,
1956 tune_params::PREF_CONST_POOL_TRUE
,
1957 tune_params::PREF_LDRD_FALSE
,
1958 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1959 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1960 tune_params::DISPARAGE_FLAGS_NEITHER
,
1961 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1962 tune_params::FUSE_NOTHING
,
1963 tune_params::SCHED_AUTOPREF_OFF
1966 const struct tune_params arm_9e_tune
=
1968 &generic_extra_costs
, /* Insn extra costs. */
1969 &generic_addr_mode_costs
, /* Addressing mode costs. */
1970 NULL
, /* Sched adj cost. */
1971 arm_default_branch_cost
,
1972 &arm_default_vec_cost
,
1973 1, /* Constant limit. */
1974 5, /* Max cond insns. */
1975 8, /* Memset max inline. */
1976 1, /* Issue rate. */
1977 ARM_PREFETCH_NOT_BENEFICIAL
,
1978 tune_params::PREF_CONST_POOL_TRUE
,
1979 tune_params::PREF_LDRD_FALSE
,
1980 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1981 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1982 tune_params::DISPARAGE_FLAGS_NEITHER
,
1983 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1984 tune_params::FUSE_NOTHING
,
1985 tune_params::SCHED_AUTOPREF_OFF
1988 const struct tune_params arm_marvell_pj4_tune
=
1990 &generic_extra_costs
, /* Insn extra costs. */
1991 &generic_addr_mode_costs
, /* Addressing mode costs. */
1992 NULL
, /* Sched adj cost. */
1993 arm_default_branch_cost
,
1994 &arm_default_vec_cost
,
1995 1, /* Constant limit. */
1996 5, /* Max cond insns. */
1997 8, /* Memset max inline. */
1998 2, /* Issue rate. */
1999 ARM_PREFETCH_NOT_BENEFICIAL
,
2000 tune_params::PREF_CONST_POOL_TRUE
,
2001 tune_params::PREF_LDRD_FALSE
,
2002 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2003 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2004 tune_params::DISPARAGE_FLAGS_NEITHER
,
2005 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2006 tune_params::FUSE_NOTHING
,
2007 tune_params::SCHED_AUTOPREF_OFF
2010 const struct tune_params arm_v6t2_tune
=
2012 &generic_extra_costs
, /* Insn extra costs. */
2013 &generic_addr_mode_costs
, /* Addressing mode costs. */
2014 NULL
, /* Sched adj cost. */
2015 arm_default_branch_cost
,
2016 &arm_default_vec_cost
,
2017 1, /* Constant limit. */
2018 5, /* Max cond insns. */
2019 8, /* Memset max inline. */
2020 1, /* Issue rate. */
2021 ARM_PREFETCH_NOT_BENEFICIAL
,
2022 tune_params::PREF_CONST_POOL_FALSE
,
2023 tune_params::PREF_LDRD_FALSE
,
2024 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2025 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2026 tune_params::DISPARAGE_FLAGS_NEITHER
,
2027 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2028 tune_params::FUSE_NOTHING
,
2029 tune_params::SCHED_AUTOPREF_OFF
2033 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2034 const struct tune_params arm_cortex_tune
=
2036 &generic_extra_costs
,
2037 &generic_addr_mode_costs
, /* Addressing mode costs. */
2038 NULL
, /* Sched adj cost. */
2039 arm_default_branch_cost
,
2040 &arm_default_vec_cost
,
2041 1, /* Constant limit. */
2042 5, /* Max cond insns. */
2043 8, /* Memset max inline. */
2044 2, /* Issue rate. */
2045 ARM_PREFETCH_NOT_BENEFICIAL
,
2046 tune_params::PREF_CONST_POOL_FALSE
,
2047 tune_params::PREF_LDRD_FALSE
,
2048 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2049 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2050 tune_params::DISPARAGE_FLAGS_NEITHER
,
2051 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2052 tune_params::FUSE_NOTHING
,
2053 tune_params::SCHED_AUTOPREF_OFF
2056 const struct tune_params arm_cortex_a8_tune
=
2058 &cortexa8_extra_costs
,
2059 &generic_addr_mode_costs
, /* Addressing mode costs. */
2060 NULL
, /* Sched adj cost. */
2061 arm_default_branch_cost
,
2062 &arm_default_vec_cost
,
2063 1, /* Constant limit. */
2064 5, /* Max cond insns. */
2065 8, /* Memset max inline. */
2066 2, /* Issue rate. */
2067 ARM_PREFETCH_NOT_BENEFICIAL
,
2068 tune_params::PREF_CONST_POOL_FALSE
,
2069 tune_params::PREF_LDRD_FALSE
,
2070 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2071 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2072 tune_params::DISPARAGE_FLAGS_NEITHER
,
2073 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2074 tune_params::FUSE_NOTHING
,
2075 tune_params::SCHED_AUTOPREF_OFF
2078 const struct tune_params arm_cortex_a7_tune
=
2080 &cortexa7_extra_costs
,
2081 &generic_addr_mode_costs
, /* Addressing mode costs. */
2082 NULL
, /* Sched adj cost. */
2083 arm_default_branch_cost
,
2084 &arm_default_vec_cost
,
2085 1, /* Constant limit. */
2086 5, /* Max cond insns. */
2087 8, /* Memset max inline. */
2088 2, /* Issue rate. */
2089 ARM_PREFETCH_NOT_BENEFICIAL
,
2090 tune_params::PREF_CONST_POOL_FALSE
,
2091 tune_params::PREF_LDRD_FALSE
,
2092 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2094 tune_params::DISPARAGE_FLAGS_NEITHER
,
2095 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2096 tune_params::FUSE_NOTHING
,
2097 tune_params::SCHED_AUTOPREF_OFF
2100 const struct tune_params arm_cortex_a15_tune
=
2102 &cortexa15_extra_costs
,
2103 &generic_addr_mode_costs
, /* Addressing mode costs. */
2104 NULL
, /* Sched adj cost. */
2105 arm_default_branch_cost
,
2106 &arm_default_vec_cost
,
2107 1, /* Constant limit. */
2108 2, /* Max cond insns. */
2109 8, /* Memset max inline. */
2110 3, /* Issue rate. */
2111 ARM_PREFETCH_NOT_BENEFICIAL
,
2112 tune_params::PREF_CONST_POOL_FALSE
,
2113 tune_params::PREF_LDRD_TRUE
,
2114 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2116 tune_params::DISPARAGE_FLAGS_ALL
,
2117 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2118 tune_params::FUSE_NOTHING
,
2119 tune_params::SCHED_AUTOPREF_FULL
2122 const struct tune_params arm_cortex_a35_tune
=
2124 &cortexa53_extra_costs
,
2125 &generic_addr_mode_costs
, /* Addressing mode costs. */
2126 NULL
, /* Sched adj cost. */
2127 arm_default_branch_cost
,
2128 &arm_default_vec_cost
,
2129 1, /* Constant limit. */
2130 5, /* Max cond insns. */
2131 8, /* Memset max inline. */
2132 1, /* Issue rate. */
2133 ARM_PREFETCH_NOT_BENEFICIAL
,
2134 tune_params::PREF_CONST_POOL_FALSE
,
2135 tune_params::PREF_LDRD_FALSE
,
2136 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2137 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2138 tune_params::DISPARAGE_FLAGS_NEITHER
,
2139 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2140 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2141 tune_params::SCHED_AUTOPREF_OFF
2144 const struct tune_params arm_cortex_a53_tune
=
2146 &cortexa53_extra_costs
,
2147 &generic_addr_mode_costs
, /* Addressing mode costs. */
2148 NULL
, /* Sched adj cost. */
2149 arm_default_branch_cost
,
2150 &arm_default_vec_cost
,
2151 1, /* Constant limit. */
2152 5, /* Max cond insns. */
2153 8, /* Memset max inline. */
2154 2, /* Issue rate. */
2155 ARM_PREFETCH_NOT_BENEFICIAL
,
2156 tune_params::PREF_CONST_POOL_FALSE
,
2157 tune_params::PREF_LDRD_FALSE
,
2158 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2159 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2160 tune_params::DISPARAGE_FLAGS_NEITHER
,
2161 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2162 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2163 tune_params::SCHED_AUTOPREF_OFF
2166 const struct tune_params arm_cortex_a57_tune
=
2168 &cortexa57_extra_costs
,
2169 &generic_addr_mode_costs
, /* addressing mode costs */
2170 NULL
, /* Sched adj cost. */
2171 arm_default_branch_cost
,
2172 &arm_default_vec_cost
,
2173 1, /* Constant limit. */
2174 2, /* Max cond insns. */
2175 8, /* Memset max inline. */
2176 3, /* Issue rate. */
2177 ARM_PREFETCH_NOT_BENEFICIAL
,
2178 tune_params::PREF_CONST_POOL_FALSE
,
2179 tune_params::PREF_LDRD_TRUE
,
2180 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2181 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2182 tune_params::DISPARAGE_FLAGS_ALL
,
2183 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2184 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2185 tune_params::SCHED_AUTOPREF_FULL
2188 const struct tune_params arm_exynosm1_tune
=
2190 &exynosm1_extra_costs
,
2191 &generic_addr_mode_costs
, /* Addressing mode costs. */
2192 NULL
, /* Sched adj cost. */
2193 arm_default_branch_cost
,
2194 &arm_default_vec_cost
,
2195 1, /* Constant limit. */
2196 2, /* Max cond insns. */
2197 8, /* Memset max inline. */
2198 3, /* Issue rate. */
2199 ARM_PREFETCH_NOT_BENEFICIAL
,
2200 tune_params::PREF_CONST_POOL_FALSE
,
2201 tune_params::PREF_LDRD_TRUE
,
2202 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2203 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2204 tune_params::DISPARAGE_FLAGS_ALL
,
2205 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2206 tune_params::FUSE_NOTHING
,
2207 tune_params::SCHED_AUTOPREF_OFF
2210 const struct tune_params arm_xgene1_tune
=
2212 &xgene1_extra_costs
,
2213 &generic_addr_mode_costs
, /* Addressing mode costs. */
2214 NULL
, /* Sched adj cost. */
2215 arm_default_branch_cost
,
2216 &arm_default_vec_cost
,
2217 1, /* Constant limit. */
2218 2, /* Max cond insns. */
2219 32, /* Memset max inline. */
2220 4, /* Issue rate. */
2221 ARM_PREFETCH_NOT_BENEFICIAL
,
2222 tune_params::PREF_CONST_POOL_FALSE
,
2223 tune_params::PREF_LDRD_TRUE
,
2224 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2225 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2226 tune_params::DISPARAGE_FLAGS_ALL
,
2227 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2228 tune_params::FUSE_NOTHING
,
2229 tune_params::SCHED_AUTOPREF_OFF
2232 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2233 less appealing. Set max_insns_skipped to a low value. */
2235 const struct tune_params arm_cortex_a5_tune
=
2237 &cortexa5_extra_costs
,
2238 &generic_addr_mode_costs
, /* Addressing mode costs. */
2239 NULL
, /* Sched adj cost. */
2240 arm_cortex_a5_branch_cost
,
2241 &arm_default_vec_cost
,
2242 1, /* Constant limit. */
2243 1, /* Max cond insns. */
2244 8, /* Memset max inline. */
2245 2, /* Issue rate. */
2246 ARM_PREFETCH_NOT_BENEFICIAL
,
2247 tune_params::PREF_CONST_POOL_FALSE
,
2248 tune_params::PREF_LDRD_FALSE
,
2249 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2251 tune_params::DISPARAGE_FLAGS_NEITHER
,
2252 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2253 tune_params::FUSE_NOTHING
,
2254 tune_params::SCHED_AUTOPREF_OFF
2257 const struct tune_params arm_cortex_a9_tune
=
2259 &cortexa9_extra_costs
,
2260 &generic_addr_mode_costs
, /* Addressing mode costs. */
2261 cortex_a9_sched_adjust_cost
,
2262 arm_default_branch_cost
,
2263 &arm_default_vec_cost
,
2264 1, /* Constant limit. */
2265 5, /* Max cond insns. */
2266 8, /* Memset max inline. */
2267 2, /* Issue rate. */
2268 ARM_PREFETCH_BENEFICIAL(4,32,32),
2269 tune_params::PREF_CONST_POOL_FALSE
,
2270 tune_params::PREF_LDRD_FALSE
,
2271 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2272 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2273 tune_params::DISPARAGE_FLAGS_NEITHER
,
2274 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2275 tune_params::FUSE_NOTHING
,
2276 tune_params::SCHED_AUTOPREF_OFF
2279 const struct tune_params arm_cortex_a12_tune
=
2281 &cortexa12_extra_costs
,
2282 &generic_addr_mode_costs
, /* Addressing mode costs. */
2283 NULL
, /* Sched adj cost. */
2284 arm_default_branch_cost
,
2285 &arm_default_vec_cost
, /* Vectorizer costs. */
2286 1, /* Constant limit. */
2287 2, /* Max cond insns. */
2288 8, /* Memset max inline. */
2289 2, /* Issue rate. */
2290 ARM_PREFETCH_NOT_BENEFICIAL
,
2291 tune_params::PREF_CONST_POOL_FALSE
,
2292 tune_params::PREF_LDRD_TRUE
,
2293 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2294 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2295 tune_params::DISPARAGE_FLAGS_ALL
,
2296 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2297 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2298 tune_params::SCHED_AUTOPREF_OFF
2301 const struct tune_params arm_cortex_a73_tune
=
2303 &cortexa57_extra_costs
,
2304 &generic_addr_mode_costs
, /* Addressing mode costs. */
2305 NULL
, /* Sched adj cost. */
2306 arm_default_branch_cost
,
2307 &arm_default_vec_cost
, /* Vectorizer costs. */
2308 1, /* Constant limit. */
2309 2, /* Max cond insns. */
2310 8, /* Memset max inline. */
2311 2, /* Issue rate. */
2312 ARM_PREFETCH_NOT_BENEFICIAL
,
2313 tune_params::PREF_CONST_POOL_FALSE
,
2314 tune_params::PREF_LDRD_TRUE
,
2315 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2316 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2317 tune_params::DISPARAGE_FLAGS_ALL
,
2318 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2319 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2320 tune_params::SCHED_AUTOPREF_FULL
2323 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2324 cycle to execute each. An LDR from the constant pool also takes two cycles
2325 to execute, but mildly increases pipelining opportunity (consecutive
2326 loads/stores can be pipelined together, saving one cycle), and may also
2327 improve icache utilisation. Hence we prefer the constant pool for such
2330 const struct tune_params arm_v7m_tune
=
2333 &generic_addr_mode_costs
, /* Addressing mode costs. */
2334 NULL
, /* Sched adj cost. */
2335 arm_cortex_m_branch_cost
,
2336 &arm_default_vec_cost
,
2337 1, /* Constant limit. */
2338 2, /* Max cond insns. */
2339 8, /* Memset max inline. */
2340 1, /* Issue rate. */
2341 ARM_PREFETCH_NOT_BENEFICIAL
,
2342 tune_params::PREF_CONST_POOL_TRUE
,
2343 tune_params::PREF_LDRD_FALSE
,
2344 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2345 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2346 tune_params::DISPARAGE_FLAGS_NEITHER
,
2347 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2348 tune_params::FUSE_NOTHING
,
2349 tune_params::SCHED_AUTOPREF_OFF
2352 /* Cortex-M7 tuning. */
2354 const struct tune_params arm_cortex_m7_tune
=
2357 &generic_addr_mode_costs
, /* Addressing mode costs. */
2358 NULL
, /* Sched adj cost. */
2359 arm_cortex_m7_branch_cost
,
2360 &arm_default_vec_cost
,
2361 0, /* Constant limit. */
2362 1, /* Max cond insns. */
2363 8, /* Memset max inline. */
2364 2, /* Issue rate. */
2365 ARM_PREFETCH_NOT_BENEFICIAL
,
2366 tune_params::PREF_CONST_POOL_TRUE
,
2367 tune_params::PREF_LDRD_FALSE
,
2368 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2369 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2370 tune_params::DISPARAGE_FLAGS_NEITHER
,
2371 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2372 tune_params::FUSE_NOTHING
,
2373 tune_params::SCHED_AUTOPREF_OFF
2376 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2377 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2379 const struct tune_params arm_v6m_tune
=
2381 &generic_extra_costs
, /* Insn extra costs. */
2382 &generic_addr_mode_costs
, /* Addressing mode costs. */
2383 NULL
, /* Sched adj cost. */
2384 arm_default_branch_cost
,
2385 &arm_default_vec_cost
, /* Vectorizer costs. */
2386 1, /* Constant limit. */
2387 5, /* Max cond insns. */
2388 8, /* Memset max inline. */
2389 1, /* Issue rate. */
2390 ARM_PREFETCH_NOT_BENEFICIAL
,
2391 tune_params::PREF_CONST_POOL_FALSE
,
2392 tune_params::PREF_LDRD_FALSE
,
2393 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2394 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2395 tune_params::DISPARAGE_FLAGS_NEITHER
,
2396 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2397 tune_params::FUSE_NOTHING
,
2398 tune_params::SCHED_AUTOPREF_OFF
2401 const struct tune_params arm_fa726te_tune
=
2403 &generic_extra_costs
, /* Insn extra costs. */
2404 &generic_addr_mode_costs
, /* Addressing mode costs. */
2405 fa726te_sched_adjust_cost
,
2406 arm_default_branch_cost
,
2407 &arm_default_vec_cost
,
2408 1, /* Constant limit. */
2409 5, /* Max cond insns. */
2410 8, /* Memset max inline. */
2411 2, /* Issue rate. */
2412 ARM_PREFETCH_NOT_BENEFICIAL
,
2413 tune_params::PREF_CONST_POOL_TRUE
,
2414 tune_params::PREF_LDRD_FALSE
,
2415 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2416 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2417 tune_params::DISPARAGE_FLAGS_NEITHER
,
2418 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2419 tune_params::FUSE_NOTHING
,
2420 tune_params::SCHED_AUTOPREF_OFF
2423 /* Key type for Pointer Authentication extension. */
2424 enum aarch_key_type aarch_ra_sign_key
= AARCH_KEY_A
;
2426 char *accepted_branch_protection_string
= NULL
;
2428 /* Auto-generated CPU, FPU and architecture tables. */
2429 #include "arm-cpu-data.h"
2431 /* The name of the preprocessor macro to define for this architecture. PROFILE
2432 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2433 is thus chosen to be big enough to hold the longest architecture name. */
2435 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2437 /* Supported TLS relocations. */
2448 TLS_DESCSEQ
/* GNU scheme */
2451 /* The maximum number of insns to be used when loading a constant. */
2453 arm_constant_limit (bool size_p
)
2455 return size_p
? 1 : current_tune
->constant_limit
;
2458 /* Emit an insn that's a simple single-set. Both the operands must be known
2460 inline static rtx_insn
*
2461 emit_set_insn (rtx x
, rtx y
)
2463 return emit_insn (gen_rtx_SET (x
, y
));
2466 /* Return the number of bits set in VALUE. */
2468 bit_count (unsigned long value
)
2470 unsigned long count
= 0;
2475 value
&= value
- 1; /* Clear the least-significant set bit. */
2481 /* Return the number of bits set in BMAP. */
2483 bitmap_popcount (const sbitmap bmap
)
2485 unsigned int count
= 0;
2487 sbitmap_iterator sbi
;
2489 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2498 } arm_fixed_mode_set
;
2500 /* A small helper for setting fixed-point library libfuncs. */
2503 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2504 const char *funcname
, const char *modename
,
2509 if (num_suffix
== 0)
2510 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2512 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2514 set_optab_libfunc (optable
, mode
, buffer
);
2518 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2519 machine_mode from
, const char *funcname
,
2520 const char *toname
, const char *fromname
)
2523 const char *maybe_suffix_2
= "";
2525 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2526 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2527 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2528 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2529 maybe_suffix_2
= "2";
2531 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2534 set_conv_libfunc (optable
, to
, from
, buffer
);
2537 static GTY(()) rtx speculation_barrier_libfunc
;
2539 /* Record that we have no arithmetic or comparison libfuncs for
2540 machine mode MODE. */
2543 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode
)
2546 set_optab_libfunc (add_optab
, mode
, NULL
);
2547 set_optab_libfunc (sdiv_optab
, mode
, NULL
);
2548 set_optab_libfunc (smul_optab
, mode
, NULL
);
2549 set_optab_libfunc (neg_optab
, mode
, NULL
);
2550 set_optab_libfunc (sub_optab
, mode
, NULL
);
2553 set_optab_libfunc (eq_optab
, mode
, NULL
);
2554 set_optab_libfunc (ne_optab
, mode
, NULL
);
2555 set_optab_libfunc (lt_optab
, mode
, NULL
);
2556 set_optab_libfunc (le_optab
, mode
, NULL
);
2557 set_optab_libfunc (ge_optab
, mode
, NULL
);
2558 set_optab_libfunc (gt_optab
, mode
, NULL
);
2559 set_optab_libfunc (unord_optab
, mode
, NULL
);
2562 /* Set up library functions unique to ARM. */
2564 arm_init_libfuncs (void)
2566 machine_mode mode_iter
;
2568 /* For Linux, we have access to kernel support for atomic operations. */
2569 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2570 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2572 /* There are no special library functions unless we are using the
2577 /* The functions below are described in Section 4 of the "Run-Time
2578 ABI for the ARM architecture", Version 1.0. */
2580 /* Double-precision floating-point arithmetic. Table 2. */
2581 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2582 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2583 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2584 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2585 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2587 /* Double-precision comparisons. Table 3. */
2588 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2589 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2590 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2591 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2592 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2593 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2594 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2596 /* Single-precision floating-point arithmetic. Table 4. */
2597 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2598 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2599 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2600 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2601 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2603 /* Single-precision comparisons. Table 5. */
2604 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2605 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2606 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2607 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2608 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2609 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2610 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2612 /* Floating-point to integer conversions. Table 6. */
2613 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2614 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2615 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2616 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2617 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2618 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2619 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2620 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2622 /* Conversions between floating types. Table 7. */
2623 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2624 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2626 /* Integer to floating-point conversions. Table 8. */
2627 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2628 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2629 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2630 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2631 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2632 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2633 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2634 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2636 /* Long long. Table 9. */
2637 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2638 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2639 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2640 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2641 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2642 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2643 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2644 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2646 /* Integer (32/32->32) division. \S 4.3.1. */
2647 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2648 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2650 /* The divmod functions are designed so that they can be used for
2651 plain division, even though they return both the quotient and the
2652 remainder. The quotient is returned in the usual location (i.e.,
2653 r0 for SImode, {r0, r1} for DImode), just as would be expected
2654 for an ordinary division routine. Because the AAPCS calling
2655 conventions specify that all of { r0, r1, r2, r3 } are
2656 callee-saved registers, there is no need to tell the compiler
2657 explicitly that those registers are clobbered by these
2659 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2660 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2662 /* For SImode division the ABI provides div-without-mod routines,
2663 which are faster. */
2664 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2665 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2667 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2668 divmod libcalls instead. */
2669 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2670 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2671 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2672 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2674 /* Half-precision float operations. The compiler handles all operations
2675 with NULL libfuncs by converting the SFmode. */
2676 switch (arm_fp16_format
)
2678 case ARM_FP16_FORMAT_IEEE
:
2679 case ARM_FP16_FORMAT_ALTERNATIVE
:
2682 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2683 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2685 : "__gnu_f2h_alternative"));
2686 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2687 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2689 : "__gnu_h2f_alternative"));
2691 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2692 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2694 : "__gnu_d2h_alternative"));
2696 arm_block_arith_comp_libfuncs_for_mode (HFmode
);
2703 /* For all possible libcalls in BFmode, record NULL. */
2704 FOR_EACH_MODE_IN_CLASS (mode_iter
, MODE_FLOAT
)
2706 set_conv_libfunc (trunc_optab
, BFmode
, mode_iter
, NULL
);
2707 set_conv_libfunc (trunc_optab
, mode_iter
, BFmode
, NULL
);
2708 set_conv_libfunc (sext_optab
, mode_iter
, BFmode
, NULL
);
2709 set_conv_libfunc (sext_optab
, BFmode
, mode_iter
, NULL
);
2711 arm_block_arith_comp_libfuncs_for_mode (BFmode
);
2713 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2715 const arm_fixed_mode_set fixed_arith_modes
[] =
2718 { E_UQQmode
, "uqq" },
2720 { E_UHQmode
, "uhq" },
2722 { E_USQmode
, "usq" },
2724 { E_UDQmode
, "udq" },
2726 { E_UTQmode
, "utq" },
2728 { E_UHAmode
, "uha" },
2730 { E_USAmode
, "usa" },
2732 { E_UDAmode
, "uda" },
2734 { E_UTAmode
, "uta" }
2736 const arm_fixed_mode_set fixed_conv_modes
[] =
2739 { E_UQQmode
, "uqq" },
2741 { E_UHQmode
, "uhq" },
2743 { E_USQmode
, "usq" },
2745 { E_UDQmode
, "udq" },
2747 { E_UTQmode
, "utq" },
2749 { E_UHAmode
, "uha" },
2751 { E_USAmode
, "usa" },
2753 { E_UDAmode
, "uda" },
2755 { E_UTAmode
, "uta" },
2766 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2768 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2769 "add", fixed_arith_modes
[i
].name
, 3);
2770 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2771 "ssadd", fixed_arith_modes
[i
].name
, 3);
2772 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2773 "usadd", fixed_arith_modes
[i
].name
, 3);
2774 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2775 "sub", fixed_arith_modes
[i
].name
, 3);
2776 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2777 "sssub", fixed_arith_modes
[i
].name
, 3);
2778 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2779 "ussub", fixed_arith_modes
[i
].name
, 3);
2780 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2781 "mul", fixed_arith_modes
[i
].name
, 3);
2782 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2783 "ssmul", fixed_arith_modes
[i
].name
, 3);
2784 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2785 "usmul", fixed_arith_modes
[i
].name
, 3);
2786 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2787 "div", fixed_arith_modes
[i
].name
, 3);
2788 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2789 "udiv", fixed_arith_modes
[i
].name
, 3);
2790 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2791 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2792 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2793 "usdiv", fixed_arith_modes
[i
].name
, 3);
2794 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2795 "neg", fixed_arith_modes
[i
].name
, 2);
2796 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2797 "ssneg", fixed_arith_modes
[i
].name
, 2);
2798 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2799 "usneg", fixed_arith_modes
[i
].name
, 2);
2800 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2801 "ashl", fixed_arith_modes
[i
].name
, 3);
2802 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2803 "ashr", fixed_arith_modes
[i
].name
, 3);
2804 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2805 "lshr", fixed_arith_modes
[i
].name
, 3);
2806 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2807 "ssashl", fixed_arith_modes
[i
].name
, 3);
2808 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2809 "usashl", fixed_arith_modes
[i
].name
, 3);
2810 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2811 "cmp", fixed_arith_modes
[i
].name
, 2);
2814 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2815 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2818 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2819 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2822 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2823 fixed_conv_modes
[j
].mode
, "fract",
2824 fixed_conv_modes
[i
].name
,
2825 fixed_conv_modes
[j
].name
);
2826 arm_set_fixed_conv_libfunc (satfract_optab
,
2827 fixed_conv_modes
[i
].mode
,
2828 fixed_conv_modes
[j
].mode
, "satfract",
2829 fixed_conv_modes
[i
].name
,
2830 fixed_conv_modes
[j
].name
);
2831 arm_set_fixed_conv_libfunc (fractuns_optab
,
2832 fixed_conv_modes
[i
].mode
,
2833 fixed_conv_modes
[j
].mode
, "fractuns",
2834 fixed_conv_modes
[i
].name
,
2835 fixed_conv_modes
[j
].name
);
2836 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2837 fixed_conv_modes
[i
].mode
,
2838 fixed_conv_modes
[j
].mode
, "satfractuns",
2839 fixed_conv_modes
[i
].name
,
2840 fixed_conv_modes
[j
].name
);
2844 if (TARGET_AAPCS_BASED
)
2845 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2847 speculation_barrier_libfunc
= init_one_libfunc ("__speculation_barrier");
2850 /* On AAPCS systems, this is the "struct __va_list". */
2851 static GTY(()) tree va_list_type
;
2853 /* Return the type to use as __builtin_va_list. */
2855 arm_build_builtin_va_list (void)
2860 if (!TARGET_AAPCS_BASED
)
2861 return std_build_builtin_va_list ();
2863 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2871 The C Library ABI further reinforces this definition in \S
2874 We must follow this definition exactly. The structure tag
2875 name is visible in C++ mangled names, and thus forms a part
2876 of the ABI. The field name may be used by people who
2877 #include <stdarg.h>. */
2878 /* Create the type. */
2879 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2880 /* Give it the required name. */
2881 va_list_name
= build_decl (BUILTINS_LOCATION
,
2883 get_identifier ("__va_list"),
2885 DECL_ARTIFICIAL (va_list_name
) = 1;
2886 TYPE_NAME (va_list_type
) = va_list_name
;
2887 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2888 /* Create the __ap field. */
2889 ap_field
= build_decl (BUILTINS_LOCATION
,
2891 get_identifier ("__ap"),
2893 DECL_ARTIFICIAL (ap_field
) = 1;
2894 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2895 TYPE_FIELDS (va_list_type
) = ap_field
;
2896 /* Compute its layout. */
2897 layout_type (va_list_type
);
2899 return va_list_type
;
2902 /* Return an expression of type "void *" pointing to the next
2903 available argument in a variable-argument list. VALIST is the
2904 user-level va_list object, of type __builtin_va_list. */
2906 arm_extract_valist_ptr (tree valist
)
2908 if (TREE_TYPE (valist
) == error_mark_node
)
2909 return error_mark_node
;
2911 /* On an AAPCS target, the pointer is stored within "struct
2913 if (TARGET_AAPCS_BASED
)
2915 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2916 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2917 valist
, ap_field
, NULL_TREE
);
2923 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2925 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2927 valist
= arm_extract_valist_ptr (valist
);
2928 std_expand_builtin_va_start (valist
, nextarg
);
2931 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2933 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2936 valist
= arm_extract_valist_ptr (valist
);
2937 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2940 /* Check any incompatible options that the user has specified. */
2942 arm_option_check_internal (struct gcc_options
*opts
)
2944 int flags
= opts
->x_target_flags
;
2946 /* iWMMXt and NEON are incompatible. */
2948 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2949 error ("iWMMXt and NEON are incompatible");
2951 /* Make sure that the processor choice does not conflict with any of the
2952 other command line choices. */
2953 if (TARGET_ARM_P (flags
)
2954 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2955 error ("target CPU does not support ARM mode");
2957 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2958 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2959 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2961 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2962 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2964 /* If this target is normally configured to use APCS frames, warn if they
2965 are turned off and debugging is turned on. */
2966 if (TARGET_ARM_P (flags
)
2967 && write_symbols
!= NO_DEBUG
2968 && !TARGET_APCS_FRAME
2969 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2970 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2973 /* iWMMXt unsupported under Thumb mode. */
2974 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2975 error ("iWMMXt unsupported under Thumb mode");
2977 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2978 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2980 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2982 error ("RTP PIC is incompatible with Thumb");
2986 if (target_pure_code
|| target_slow_flash_data
)
2988 const char *flag
= (target_pure_code
? "-mpure-code" :
2989 "-mslow-flash-data");
2990 bool common_unsupported_modes
= arm_arch_notm
|| flag_pic
|| TARGET_NEON
;
2992 /* We only support -mslow-flash-data on M-profile targets with
2994 if (target_slow_flash_data
&& (!TARGET_HAVE_MOVT
|| common_unsupported_modes
))
2995 error ("%s only supports non-pic code on M-profile targets with the "
2996 "MOVT instruction", flag
);
2998 /* We only support -mpure-code on M-profile targets. */
2999 if (target_pure_code
&& common_unsupported_modes
)
3000 error ("%s only supports non-pic code on M-profile targets", flag
);
3002 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
3003 -mword-relocations forbids relocation of MOVT/MOVW. */
3004 if (target_word_relocations
)
3005 error ("%s incompatible with %<-mword-relocations%>", flag
);
3009 /* Recompute the global settings depending on target attribute options. */
3012 arm_option_params_internal (void)
3014 /* If we are not using the default (ARM mode) section anchor offset
3015 ranges, then set the correct ranges now. */
3018 /* Thumb-1 LDR instructions cannot have negative offsets.
3019 Permissible positive offset ranges are 5-bit (for byte loads),
3020 6-bit (for halfword loads), or 7-bit (for word loads).
3021 Empirical results suggest a 7-bit anchor range gives the best
3022 overall code size. */
3023 targetm
.min_anchor_offset
= 0;
3024 targetm
.max_anchor_offset
= 127;
3026 else if (TARGET_THUMB2
)
3028 /* The minimum is set such that the total size of the block
3029 for a particular anchor is 248 + 1 + 4095 bytes, which is
3030 divisible by eight, ensuring natural spacing of anchors. */
3031 targetm
.min_anchor_offset
= -248;
3032 targetm
.max_anchor_offset
= 4095;
3036 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
3037 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
3040 /* Increase the number of conditional instructions with -Os. */
3041 max_insns_skipped
= optimize_size
? 4 : current_tune
->max_insns_skipped
;
3043 /* For THUMB2, we limit the conditional sequence to one IT block. */
3045 max_insns_skipped
= MIN (max_insns_skipped
, MAX_INSN_PER_IT_BLOCK
);
3048 targetm
.md_asm_adjust
= thumb1_md_asm_adjust
;
3050 targetm
.md_asm_adjust
= arm_md_asm_adjust
;
3053 /* True if -mflip-thumb should next add an attribute for the default
3054 mode, false if it should next add an attribute for the opposite mode. */
3055 static GTY(()) bool thumb_flipper
;
3057 /* Options after initial target override. */
3058 static GTY(()) tree init_optimize
;
3061 arm_override_options_after_change_1 (struct gcc_options
*opts
,
3062 struct gcc_options
*opts_set
)
3064 /* -falign-functions without argument: supply one. */
3065 if (opts
->x_flag_align_functions
&& !opts_set
->x_str_align_functions
)
3066 opts
->x_str_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
3067 && opts
->x_optimize_size
? "2" : "4";
3070 /* Implement targetm.override_options_after_change. */
3073 arm_override_options_after_change (void)
3075 arm_override_options_after_change_1 (&global_options
, &global_options_set
);
3078 /* Implement TARGET_OPTION_RESTORE. */
3080 arm_option_restore (struct gcc_options */
* opts */
,
3081 struct gcc_options */
* opts_set */
,
3082 struct cl_target_option
*ptr
)
3084 arm_configure_build_target (&arm_active_target
, ptr
, false);
3085 arm_option_reconfigure_globals ();
3088 /* Reset options between modes that the user has specified. */
3090 arm_option_override_internal (struct gcc_options
*opts
,
3091 struct gcc_options
*opts_set
)
3093 arm_override_options_after_change_1 (opts
, opts_set
);
3095 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3097 /* The default is to enable interworking, so this warning message would
3098 be confusing to users who have just compiled with
3099 eg, -march=armv4. */
3100 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3101 opts
->x_target_flags
&= ~MASK_INTERWORK
;
3104 if (TARGET_THUMB_P (opts
->x_target_flags
)
3105 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3107 warning (0, "target CPU does not support THUMB instructions");
3108 opts
->x_target_flags
&= ~MASK_THUMB
;
3111 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
3113 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3114 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
3117 /* Callee super interworking implies thumb interworking. Adding
3118 this to the flags here simplifies the logic elsewhere. */
3119 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
3120 opts
->x_target_flags
|= MASK_INTERWORK
;
3122 /* need to remember initial values so combinaisons of options like
3123 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3124 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
3126 if (! opts_set
->x_arm_restrict_it
)
3127 opts
->x_arm_restrict_it
= arm_arch8
;
3129 /* ARM execution state and M profile don't have [restrict] IT. */
3130 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
3131 opts
->x_arm_restrict_it
= 0;
3133 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3134 if (!opts_set
->x_arm_restrict_it
3135 && (opts_set
->x_arm_cpu_string
|| opts_set
->x_arm_tune_string
))
3136 opts
->x_arm_restrict_it
= 0;
3138 /* Enable -munaligned-access by default for
3139 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3140 i.e. Thumb2 and ARM state only.
3141 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3142 - ARMv8 architecture-base processors.
3144 Disable -munaligned-access by default for
3145 - all pre-ARMv6 architecture-based processors
3146 - ARMv6-M architecture-based processors
3147 - ARMv8-M Baseline processors. */
3149 if (! opts_set
->x_unaligned_access
)
3151 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
3152 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
3154 else if (opts
->x_unaligned_access
== 1
3155 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3157 warning (0, "target CPU does not support unaligned accesses");
3158 opts
->x_unaligned_access
= 0;
3161 /* Don't warn since it's on by default in -O2. */
3162 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3163 opts
->x_flag_schedule_insns
= 0;
3165 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3167 /* Disable shrink-wrap when optimizing function for size, since it tends to
3168 generate additional returns. */
3169 if (optimize_function_for_size_p (cfun
)
3170 && TARGET_THUMB2_P (opts
->x_target_flags
))
3171 opts
->x_flag_shrink_wrap
= false;
3173 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3175 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3176 - epilogue_insns - does not accurately model the corresponding insns
3177 emitted in the asm file. In particular, see the comment in thumb_exit
3178 'Find out how many of the (return) argument registers we can corrupt'.
3179 As a consequence, the epilogue may clobber registers without fipa-ra
3180 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3181 TODO: Accurately model clobbers for epilogue_insns and reenable
3183 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3184 opts
->x_flag_ipa_ra
= 0;
3186 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3188 /* Thumb2 inline assembly code should always use unified syntax.
3189 This will apply to ARM and Thumb1 eventually. */
3190 if (TARGET_THUMB2_P (opts
->x_target_flags
))
3191 opts
->x_inline_asm_unified
= true;
3193 if (arm_stack_protector_guard
== SSP_GLOBAL
3194 && opts
->x_arm_stack_protector_guard_offset_str
)
3196 error ("incompatible options %<-mstack-protector-guard=global%> and "
3197 "%<-mstack-protector-guard-offset=%s%>",
3198 arm_stack_protector_guard_offset_str
);
3201 if (opts
->x_arm_stack_protector_guard_offset_str
)
3204 const char *str
= arm_stack_protector_guard_offset_str
;
3206 long offs
= strtol (arm_stack_protector_guard_offset_str
, &end
, 0);
3207 if (!*str
|| *end
|| errno
)
3208 error ("%qs is not a valid offset in %qs", str
,
3209 "-mstack-protector-guard-offset=");
3210 arm_stack_protector_guard_offset
= offs
;
3213 if (arm_current_function_pac_enabled_p ())
3215 if (!arm_arch8m_main
)
3216 error ("This architecture does not support branch protection "
3218 if (TARGET_TPCS_FRAME
)
3219 sorry ("Return address signing is not supported with %<-mtpcs-frame%>.");
3222 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3223 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3227 static sbitmap isa_all_fpubits_internal
;
3228 static sbitmap isa_all_fpbits
;
3229 static sbitmap isa_quirkbits
;
3231 /* Configure a build target TARGET from the user-specified options OPTS and
3232 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3233 architecture have been specified, but the two are not identical. */
3235 arm_configure_build_target (struct arm_build_target
*target
,
3236 struct cl_target_option
*opts
,
3237 bool warn_compatible
)
3239 const cpu_option
*arm_selected_tune
= NULL
;
3240 const arch_option
*arm_selected_arch
= NULL
;
3241 const cpu_option
*arm_selected_cpu
= NULL
;
3242 const arm_fpu_desc
*arm_selected_fpu
= NULL
;
3243 const char *tune_opts
= NULL
;
3244 const char *arch_opts
= NULL
;
3245 const char *cpu_opts
= NULL
;
3247 bitmap_clear (target
->isa
);
3248 target
->core_name
= NULL
;
3249 target
->arch_name
= NULL
;
3251 if (opts
->x_arm_arch_string
)
3253 arm_selected_arch
= arm_parse_arch_option_name (all_architectures
,
3255 opts
->x_arm_arch_string
);
3256 arch_opts
= strchr (opts
->x_arm_arch_string
, '+');
3259 if (opts
->x_arm_cpu_string
)
3261 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "-mcpu",
3262 opts
->x_arm_cpu_string
);
3263 cpu_opts
= strchr (opts
->x_arm_cpu_string
, '+');
3264 arm_selected_tune
= arm_selected_cpu
;
3265 /* If taking the tuning from -mcpu, we don't need to rescan the
3266 options for tuning. */
3269 if (opts
->x_arm_tune_string
)
3271 arm_selected_tune
= arm_parse_cpu_option_name (all_cores
, "-mtune",
3272 opts
->x_arm_tune_string
);
3273 tune_opts
= strchr (opts
->x_arm_tune_string
, '+');
3276 if (opts
->x_arm_branch_protection_string
)
3278 aarch_validate_mbranch_protection (opts
->x_arm_branch_protection_string
);
3280 if (aarch_ra_sign_key
!= AARCH_KEY_A
)
3282 warning (0, "invalid key type for %<-mbranch-protection=%>");
3283 aarch_ra_sign_key
= AARCH_KEY_A
;
3287 if (arm_selected_arch
)
3289 arm_initialize_isa (target
->isa
, arm_selected_arch
->common
.isa_bits
);
3290 arm_parse_option_features (target
->isa
, &arm_selected_arch
->common
,
3293 if (arm_selected_cpu
)
3295 auto_sbitmap
cpu_isa (isa_num_bits
);
3296 auto_sbitmap
isa_delta (isa_num_bits
);
3298 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->common
.isa_bits
);
3299 arm_parse_option_features (cpu_isa
, &arm_selected_cpu
->common
,
3301 bitmap_xor (isa_delta
, cpu_isa
, target
->isa
);
3302 /* Ignore any bits that are quirk bits. */
3303 bitmap_and_compl (isa_delta
, isa_delta
, isa_quirkbits
);
3304 /* If the user (or the default configuration) has specified a
3305 specific FPU, then ignore any bits that depend on the FPU
3306 configuration. Do similarly if using the soft-float
3308 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
3309 || arm_float_abi
== ARM_FLOAT_ABI_SOFT
)
3310 bitmap_and_compl (isa_delta
, isa_delta
, isa_all_fpbits
);
3312 if (!bitmap_empty_p (isa_delta
))
3314 if (warn_compatible
)
3315 warning (0, "switch %<-mcpu=%s%> conflicts "
3316 "with switch %<-march=%s%>",
3317 opts
->x_arm_cpu_string
,
3318 opts
->x_arm_arch_string
);
3320 /* -march wins for code generation.
3321 -mcpu wins for default tuning. */
3322 if (!arm_selected_tune
)
3323 arm_selected_tune
= arm_selected_cpu
;
3325 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3326 target
->arch_name
= arm_selected_arch
->common
.name
;
3330 /* Architecture and CPU are essentially the same.
3331 Prefer the CPU setting. */
3332 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3333 target
->core_name
= arm_selected_cpu
->common
.name
;
3334 /* Copy the CPU's capabilities, so that we inherit the
3335 appropriate extensions and quirks. */
3336 bitmap_copy (target
->isa
, cpu_isa
);
3341 /* Pick a CPU based on the architecture. */
3342 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3343 target
->arch_name
= arm_selected_arch
->common
.name
;
3344 /* Note: target->core_name is left unset in this path. */
3347 else if (arm_selected_cpu
)
3349 target
->core_name
= arm_selected_cpu
->common
.name
;
3350 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3351 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3353 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3355 /* If the user did not specify a processor or architecture, choose
3359 const cpu_option
*sel
;
3360 auto_sbitmap
sought_isa (isa_num_bits
);
3361 bitmap_clear (sought_isa
);
3362 auto_sbitmap
default_isa (isa_num_bits
);
3364 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "default CPU",
3365 TARGET_CPU_DEFAULT
);
3366 cpu_opts
= strchr (TARGET_CPU_DEFAULT
, '+');
3367 gcc_assert (arm_selected_cpu
->common
.name
);
3369 /* RWE: All of the selection logic below (to the end of this
3370 'if' clause) looks somewhat suspect. It appears to be mostly
3371 there to support forcing thumb support when the default CPU
3372 does not have thumb (somewhat dubious in terms of what the
3373 user might be expecting). I think it should be removed once
3374 support for the pre-thumb era cores is removed. */
3375 sel
= arm_selected_cpu
;
3376 arm_initialize_isa (default_isa
, sel
->common
.isa_bits
);
3377 arm_parse_option_features (default_isa
, &arm_selected_cpu
->common
,
3380 /* Now check to see if the user has specified any command line
3381 switches that require certain abilities from the cpu. */
3383 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3384 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3386 /* If there are such requirements and the default CPU does not
3387 satisfy them, we need to run over the complete list of
3388 cores looking for one that is satisfactory. */
3389 if (!bitmap_empty_p (sought_isa
)
3390 && !bitmap_subset_p (sought_isa
, default_isa
))
3392 auto_sbitmap
candidate_isa (isa_num_bits
);
3393 /* We're only interested in a CPU with at least the
3394 capabilities of the default CPU and the required
3395 additional features. */
3396 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3398 /* Try to locate a CPU type that supports all of the abilities
3399 of the default CPU, plus the extra abilities requested by
3401 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3403 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3404 /* An exact match? */
3405 if (bitmap_equal_p (default_isa
, candidate_isa
))
3409 if (sel
->common
.name
== NULL
)
3411 unsigned current_bit_count
= isa_num_bits
;
3412 const cpu_option
*best_fit
= NULL
;
3414 /* Ideally we would like to issue an error message here
3415 saying that it was not possible to find a CPU compatible
3416 with the default CPU, but which also supports the command
3417 line options specified by the programmer, and so they
3418 ought to use the -mcpu=<name> command line option to
3419 override the default CPU type.
3421 If we cannot find a CPU that has exactly the
3422 characteristics of the default CPU and the given
3423 command line options we scan the array again looking
3424 for a best match. The best match must have at least
3425 the capabilities of the perfect match. */
3426 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3428 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3430 if (bitmap_subset_p (default_isa
, candidate_isa
))
3434 bitmap_and_compl (candidate_isa
, candidate_isa
,
3436 count
= bitmap_popcount (candidate_isa
);
3438 if (count
< current_bit_count
)
3441 current_bit_count
= count
;
3445 gcc_assert (best_fit
);
3449 arm_selected_cpu
= sel
;
3452 /* Now we know the CPU, we can finally initialize the target
3454 target
->core_name
= arm_selected_cpu
->common
.name
;
3455 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3456 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3458 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3461 gcc_assert (arm_selected_cpu
);
3462 gcc_assert (arm_selected_arch
);
3464 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3466 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3467 auto_sbitmap
fpu_bits (isa_num_bits
);
3469 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3470 /* This should clear out ALL bits relating to the FPU/simd
3471 extensions, to avoid potentially invalid combinations later on
3472 that we can't match. At present we only clear out those bits
3473 that can be set by -mfpu. This should be fixed in GCC-12. */
3474 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits_internal
);
3475 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3478 /* If we have the soft-float ABI, clear any feature bits relating to use of
3479 floating-point operations. They'll just confuse things later on. */
3480 if (arm_float_abi
== ARM_FLOAT_ABI_SOFT
)
3481 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpbits
);
3483 /* There may be implied bits which we still need to enable. These are
3484 non-named features which are needed to complete other sets of features,
3485 but cannot be enabled from arm-cpus.in due to being shared between
3486 multiple fgroups. Each entry in all_implied_fbits is of the form
3487 ante -> cons, meaning that if the feature "ante" is enabled, we should
3488 implicitly enable "cons". */
3489 const struct fbit_implication
*impl
= all_implied_fbits
;
3492 if (bitmap_bit_p (target
->isa
, impl
->ante
))
3493 bitmap_set_bit (target
->isa
, impl
->cons
);
3497 if (!arm_selected_tune
)
3498 arm_selected_tune
= arm_selected_cpu
;
3499 else /* Validate the features passed to -mtune. */
3500 arm_parse_option_features (NULL
, &arm_selected_tune
->common
, tune_opts
);
3502 const cpu_tune
*tune_data
= &all_tunes
[arm_selected_tune
- all_cores
];
3504 /* Finish initializing the target structure. */
3505 if (!target
->arch_name
)
3506 target
->arch_name
= arm_selected_arch
->common
.name
;
3507 target
->arch_pp_name
= arm_selected_arch
->arch
;
3508 target
->base_arch
= arm_selected_arch
->base_arch
;
3509 target
->profile
= arm_selected_arch
->profile
;
3511 target
->tune_flags
= tune_data
->tune_flags
;
3512 target
->tune
= tune_data
->tune
;
3513 target
->tune_core
= tune_data
->scheduler
;
3516 /* Fix up any incompatible options that the user has specified. */
3518 arm_option_override (void)
3520 static const enum isa_feature fpu_bitlist_internal
[]
3521 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
3522 /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main. */
3523 static const enum isa_feature fp_bitlist
[]
3524 = { ISA_ALL_FP
, isa_bit_mve_float
, isa_nobit
};
3525 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3526 cl_target_option opts
;
3528 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3529 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3531 isa_all_fpubits_internal
= sbitmap_alloc (isa_num_bits
);
3532 isa_all_fpbits
= sbitmap_alloc (isa_num_bits
);
3533 arm_initialize_isa (isa_all_fpubits_internal
, fpu_bitlist_internal
);
3534 arm_initialize_isa (isa_all_fpbits
, fp_bitlist
);
3536 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3538 if (!OPTION_SET_P (arm_fpu_index
))
3543 ok
= opt_enum_arg_to_value (OPT_mfpu_
, FPUTYPE_AUTO
, &fpu_index
,
3546 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3549 cl_target_option_save (&opts
, &global_options
, &global_options_set
);
3550 arm_configure_build_target (&arm_active_target
, &opts
, true);
3552 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3553 SUBTARGET_OVERRIDE_OPTIONS
;
3556 /* Initialize boolean versions of the architectural flags, for use
3557 in the arm.md file and for enabling feature flags. */
3558 arm_option_reconfigure_globals ();
3560 arm_tune
= arm_active_target
.tune_core
;
3561 tune_flags
= arm_active_target
.tune_flags
;
3562 current_tune
= arm_active_target
.tune
;
3564 /* TBD: Dwarf info for apcs frame is not handled yet. */
3565 if (TARGET_APCS_FRAME
)
3566 flag_shrink_wrap
= false;
3568 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3570 warning (0, "%<-mapcs-stack-check%> incompatible with "
3571 "%<-mno-apcs-frame%>");
3572 target_flags
|= MASK_APCS_FRAME
;
3575 if (TARGET_POKE_FUNCTION_NAME
)
3576 target_flags
|= MASK_APCS_FRAME
;
3578 if (TARGET_APCS_REENT
&& flag_pic
)
3579 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3581 if (TARGET_APCS_REENT
)
3582 warning (0, "APCS reentrant code not supported. Ignored");
3584 /* Set up some tuning parameters. */
3585 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3586 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3587 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3588 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3589 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3590 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3592 /* For arm2/3 there is no need to do any scheduling if we are doing
3593 software floating-point. */
3594 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3595 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3597 /* Override the default structure alignment for AAPCS ABI. */
3598 if (!OPTION_SET_P (arm_structure_size_boundary
))
3600 if (TARGET_AAPCS_BASED
)
3601 arm_structure_size_boundary
= 8;
3605 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3607 if (arm_structure_size_boundary
!= 8
3608 && arm_structure_size_boundary
!= 32
3609 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3611 if (ARM_DOUBLEWORD_ALIGN
)
3613 "structure size boundary can only be set to 8, 32 or 64");
3615 warning (0, "structure size boundary can only be set to 8 or 32");
3616 arm_structure_size_boundary
3617 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3621 if (TARGET_VXWORKS_RTP
)
3623 if (!OPTION_SET_P (arm_pic_data_is_text_relative
))
3624 arm_pic_data_is_text_relative
= 0;
3627 && !arm_pic_data_is_text_relative
3628 && !(OPTION_SET_P (target_flags
) & MASK_SINGLE_PIC_BASE
))
3629 /* When text & data segments don't have a fixed displacement, the
3630 intended use is with a single, read only, pic base register.
3631 Unless the user explicitly requested not to do that, set
3633 target_flags
|= MASK_SINGLE_PIC_BASE
;
3635 /* If stack checking is disabled, we can use r10 as the PIC register,
3636 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3637 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3639 if (TARGET_VXWORKS_RTP
)
3640 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3641 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3644 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3645 arm_pic_register
= 9;
3647 /* If in FDPIC mode then force arm_pic_register to be r9. */
3650 arm_pic_register
= FDPIC_REGNUM
;
3652 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3655 if (arm_pic_register_string
!= NULL
)
3657 int pic_register
= decode_reg_name (arm_pic_register_string
);
3660 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3662 /* Prevent the user from choosing an obviously stupid PIC register. */
3663 else if (pic_register
< 0 || call_used_or_fixed_reg_p (pic_register
)
3664 || pic_register
== HARD_FRAME_POINTER_REGNUM
3665 || pic_register
== STACK_POINTER_REGNUM
3666 || pic_register
>= PC_REGNUM
3667 || (TARGET_VXWORKS_RTP
3668 && (unsigned int) pic_register
!= arm_pic_register
))
3669 error ("unable to use %qs for PIC register", arm_pic_register_string
);
3671 arm_pic_register
= pic_register
;
3675 target_word_relocations
= 1;
3677 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3678 if (fix_cm3_ldrd
== 2)
3680 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_cm3_ldrd
))
3686 /* Enable fix_vlldm by default if required. */
3689 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_vlldm
))
3695 /* Enable fix_aes by default if required. */
3696 if (fix_aes_erratum_1742098
== 2)
3698 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_aes_1742098
))
3699 fix_aes_erratum_1742098
= 1;
3701 fix_aes_erratum_1742098
= 0;
3704 /* Hot/Cold partitioning is not currently supported, since we can't
3705 handle literal pool placement in that case. */
3706 if (flag_reorder_blocks_and_partition
)
3708 inform (input_location
,
3709 "%<-freorder-blocks-and-partition%> not supported "
3710 "on this architecture");
3711 flag_reorder_blocks_and_partition
= 0;
3712 flag_reorder_blocks
= 1;
3716 /* Hoisting PIC address calculations more aggressively provides a small,
3717 but measurable, size reduction for PIC code. Therefore, we decrease
3718 the bar for unrestricted expression hoisting to the cost of PIC address
3719 calculation, which is 2 instructions. */
3720 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3721 param_gcse_unrestricted_cost
, 2);
3723 /* ARM EABI defaults to strict volatile bitfields. */
3724 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3725 && abi_version_at_least(2))
3726 flag_strict_volatile_bitfields
= 1;
3728 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3729 have deemed it beneficial (signified by setting
3730 prefetch.num_slots to 1 or more). */
3731 if (flag_prefetch_loop_arrays
< 0
3734 && current_tune
->prefetch
.num_slots
> 0)
3735 flag_prefetch_loop_arrays
= 1;
3737 /* Set up parameters to be used in prefetching algorithm. Do not
3738 override the defaults unless we are tuning for a core we have
3739 researched values for. */
3740 if (current_tune
->prefetch
.num_slots
> 0)
3741 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3742 param_simultaneous_prefetches
,
3743 current_tune
->prefetch
.num_slots
);
3744 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3745 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3746 param_l1_cache_line_size
,
3747 current_tune
->prefetch
.l1_cache_line_size
);
3748 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3750 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3751 param_destruct_interfere_size
,
3752 current_tune
->prefetch
.l1_cache_line_size
);
3753 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3754 param_construct_interfere_size
,
3755 current_tune
->prefetch
.l1_cache_line_size
);
3759 /* For a generic ARM target, JF Bastien proposed using 64 for both. */
3760 /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3762 /* More recent Cortex chips have a 64-byte cache line, but are marked
3763 ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults. */
3764 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3765 param_destruct_interfere_size
, 64);
3766 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3767 param_construct_interfere_size
, 64);
3770 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3771 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3772 param_l1_cache_size
,
3773 current_tune
->prefetch
.l1_cache_size
);
3775 /* Look through ready list and all of queue for instructions
3776 relevant for L2 auto-prefetcher. */
3777 int sched_autopref_queue_depth
;
3779 switch (current_tune
->sched_autopref
)
3781 case tune_params::SCHED_AUTOPREF_OFF
:
3782 sched_autopref_queue_depth
= -1;
3785 case tune_params::SCHED_AUTOPREF_RANK
:
3786 sched_autopref_queue_depth
= 0;
3789 case tune_params::SCHED_AUTOPREF_FULL
:
3790 sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3797 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3798 param_sched_autopref_queue_depth
,
3799 sched_autopref_queue_depth
);
3801 /* Currently, for slow flash data, we just disable literal pools. We also
3802 disable it for pure-code. */
3803 if (target_slow_flash_data
|| target_pure_code
)
3804 arm_disable_literal_pool
= true;
3806 /* Disable scheduling fusion by default if it's not armv7 processor
3807 or doesn't prefer ldrd/strd. */
3808 if (flag_schedule_fusion
== 2
3809 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3810 flag_schedule_fusion
= 0;
3812 /* Need to remember initial options before they are overriden. */
3813 init_optimize
= build_optimization_node (&global_options
,
3814 &global_options_set
);
3816 arm_options_perform_arch_sanity_checks ();
3817 arm_option_override_internal (&global_options
, &global_options_set
);
3818 arm_option_check_internal (&global_options
);
3819 arm_option_params_internal ();
3821 /* Create the default target_options structure. */
3822 target_option_default_node
= target_option_current_node
3823 = build_target_option_node (&global_options
, &global_options_set
);
3825 /* Register global variables with the garbage collector. */
3826 arm_add_gc_roots ();
3828 /* Init initial mode for testing. */
3829 thumb_flipper
= TARGET_THUMB
;
3833 /* Reconfigure global status flags from the active_target.isa. */
3835 arm_option_reconfigure_globals (void)
3837 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3838 arm_base_arch
= arm_active_target
.base_arch
;
3840 /* Initialize boolean versions of the architectural flags, for use
3841 in the arm.md file. */
3842 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv4
);
3843 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3844 arm_arch5t
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5t
);
3845 arm_arch5te
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5te
);
3846 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6
);
3847 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6k
);
3848 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3849 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3850 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7
);
3851 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7em
);
3852 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8
);
3853 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_1
);
3854 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_2
);
3855 arm_arch8_3
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_3
);
3856 arm_arch8_4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_4
);
3857 arm_arch8_1m_main
= bitmap_bit_p (arm_active_target
.isa
,
3858 isa_bit_armv8_1m_main
);
3859 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3860 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3861 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3862 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3863 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3864 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3865 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3866 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3867 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3868 arm_arch8m_main
= arm_arch7
&& arm_arch_cmse
;
3869 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3870 arm_arch_i8mm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_i8mm
);
3871 arm_arch_bf16
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_bf16
);
3873 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3876 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3877 error ("selected fp16 options are incompatible");
3878 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3882 arm_arch_cde_coproc
= 0;
3883 int cde_bits
[] = {isa_bit_cdecp0
, isa_bit_cdecp1
, isa_bit_cdecp2
,
3884 isa_bit_cdecp3
, isa_bit_cdecp4
, isa_bit_cdecp5
,
3885 isa_bit_cdecp6
, isa_bit_cdecp7
};
3886 for (int i
= 0, e
= ARRAY_SIZE (cde_bits
); i
< e
; i
++)
3888 int cde_bit
= bitmap_bit_p (arm_active_target
.isa
, cde_bits
[i
]);
3891 arm_arch_cde
|= cde_bit
;
3892 arm_arch_cde_coproc
|= arm_arch_cde_coproc_bits
[i
];
3896 /* And finally, set up some quirks. */
3897 arm_arch_no_volatile_ce
3898 = bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_volatile_ce
);
3899 arm_arch6kz
= arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
,
3900 isa_bit_quirk_armv6kz
);
3902 /* Use the cp15 method if it is available. */
3903 if (target_thread_pointer
== TP_AUTO
)
3905 if (arm_arch6k
&& !TARGET_THUMB1
)
3906 target_thread_pointer
= TP_CP15
;
3908 target_thread_pointer
= TP_SOFT
;
3911 if (!TARGET_HARD_TP
&& arm_stack_protector_guard
== SSP_TLSREG
)
3912 error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3915 /* Perform some validation between the desired architecture and the rest of the
3918 arm_options_perform_arch_sanity_checks (void)
3920 /* V5T code we generate is completely interworking capable, so we turn off
3921 TARGET_INTERWORK here to avoid many tests later on. */
3923 /* XXX However, we must pass the right pre-processor defines to CPP
3924 or GLD can get confused. This is a hack. */
3925 if (TARGET_INTERWORK
)
3926 arm_cpp_interwork
= 1;
3929 target_flags
&= ~MASK_INTERWORK
;
3931 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3932 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3934 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3935 error ("iwmmxt abi requires an iwmmxt capable cpu");
3937 /* BPABI targets use linker tricks to allow interworking on cores
3938 without thumb support. */
3939 if (TARGET_INTERWORK
3941 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3943 warning (0, "target CPU does not support interworking" );
3944 target_flags
&= ~MASK_INTERWORK
;
3947 /* If soft-float is specified then don't use FPU. */
3948 if (TARGET_SOFT_FLOAT
)
3949 arm_fpu_attr
= FPU_NONE
;
3951 arm_fpu_attr
= FPU_VFP
;
3953 if (TARGET_AAPCS_BASED
)
3955 if (TARGET_CALLER_INTERWORKING
)
3956 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3958 if (TARGET_CALLEE_INTERWORKING
)
3959 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3962 /* __fp16 support currently assumes the core has ldrh. */
3963 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3964 sorry ("%<__fp16%> and no ldrh");
3966 if (use_cmse
&& !arm_arch_cmse
)
3967 error ("target CPU does not support ARMv8-M Security Extensions");
3969 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3970 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3971 if (use_cmse
&& TARGET_HARD_FLOAT
&& LAST_VFP_REGNUM
> LAST_LO_VFP_REGNUM
)
3972 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3975 if (TARGET_AAPCS_BASED
)
3977 if (arm_abi
== ARM_ABI_IWMMXT
)
3978 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3979 else if (TARGET_HARD_FLOAT_ABI
)
3981 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3982 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_vfpv2
)
3983 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_mve
))
3984 error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
3987 arm_pcs_default
= ARM_PCS_AAPCS
;
3991 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3992 sorry ("%<-mfloat-abi=hard%> and VFP");
3994 if (arm_abi
== ARM_ABI_APCS
)
3995 arm_pcs_default
= ARM_PCS_APCS
;
3997 arm_pcs_default
= ARM_PCS_ATPCS
;
4001 /* Test whether a local function descriptor is canonical, i.e.,
4002 whether we can use GOTOFFFUNCDESC to compute the address of the
4005 arm_fdpic_local_funcdesc_p (rtx fnx
)
4008 enum symbol_visibility vis
;
4014 if (! SYMBOL_REF_LOCAL_P (fnx
))
4017 fn
= SYMBOL_REF_DECL (fnx
);
4022 vis
= DECL_VISIBILITY (fn
);
4024 if (vis
== VISIBILITY_PROTECTED
)
4025 /* Private function descriptors for protected functions are not
4026 canonical. Temporarily change the visibility to global so that
4027 we can ensure uniqueness of funcdesc pointers. */
4028 DECL_VISIBILITY (fn
) = VISIBILITY_DEFAULT
;
4030 ret
= default_binds_local_p_1 (fn
, flag_pic
);
4032 DECL_VISIBILITY (fn
) = vis
;
4038 arm_add_gc_roots (void)
4040 gcc_obstack_init(&minipool_obstack
);
4041 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
4044 /* A table of known ARM exception types.
4045 For use with the interrupt function attribute. */
4049 const char *const arg
;
4050 const unsigned long return_value
;
4054 static const isr_attribute_arg isr_attribute_args
[] =
4056 { "IRQ", ARM_FT_ISR
},
4057 { "irq", ARM_FT_ISR
},
4058 { "FIQ", ARM_FT_FIQ
},
4059 { "fiq", ARM_FT_FIQ
},
4060 { "ABORT", ARM_FT_ISR
},
4061 { "abort", ARM_FT_ISR
},
4062 { "UNDEF", ARM_FT_EXCEPTION
},
4063 { "undef", ARM_FT_EXCEPTION
},
4064 { "SWI", ARM_FT_EXCEPTION
},
4065 { "swi", ARM_FT_EXCEPTION
},
4066 { NULL
, ARM_FT_NORMAL
}
4069 /* Returns the (interrupt) function type of the current
4070 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
4072 static unsigned long
4073 arm_isr_value (tree argument
)
4075 const isr_attribute_arg
* ptr
;
4079 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
4081 /* No argument - default to IRQ. */
4082 if (argument
== NULL_TREE
)
4085 /* Get the value of the argument. */
4086 if (TREE_VALUE (argument
) == NULL_TREE
4087 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
4088 return ARM_FT_UNKNOWN
;
4090 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
4092 /* Check it against the list of known arguments. */
4093 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
4094 if (streq (arg
, ptr
->arg
))
4095 return ptr
->return_value
;
4097 /* An unrecognized interrupt type. */
4098 return ARM_FT_UNKNOWN
;
4101 /* Computes the type of the current function. */
4103 static unsigned long
4104 arm_compute_func_type (void)
4106 unsigned long type
= ARM_FT_UNKNOWN
;
4110 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
4112 /* Decide if the current function is volatile. Such functions
4113 never return, and many memory cycles can be saved by not storing
4114 register values that will never be needed again. This optimization
4115 was added to speed up context switching in a kernel application. */
4117 && (TREE_NOTHROW (current_function_decl
)
4118 || !(flag_unwind_tables
4120 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
4121 && TREE_THIS_VOLATILE (current_function_decl
))
4122 type
|= ARM_FT_VOLATILE
;
4124 if (cfun
->static_chain_decl
!= NULL
)
4125 type
|= ARM_FT_NESTED
;
4127 attr
= DECL_ATTRIBUTES (current_function_decl
);
4129 a
= lookup_attribute ("naked", attr
);
4131 type
|= ARM_FT_NAKED
;
4133 a
= lookup_attribute ("isr", attr
);
4135 a
= lookup_attribute ("interrupt", attr
);
4138 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
4140 type
|= arm_isr_value (TREE_VALUE (a
));
4142 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
4143 type
|= ARM_FT_CMSE_ENTRY
;
4148 /* Returns the type of the current function. */
4151 arm_current_func_type (void)
4153 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
4154 cfun
->machine
->func_type
= arm_compute_func_type ();
4156 return cfun
->machine
->func_type
;
4160 arm_allocate_stack_slots_for_args (void)
4162 /* Naked functions should not allocate stack slots for arguments. */
4163 return !IS_NAKED (arm_current_func_type ());
4167 arm_warn_func_return (tree decl
)
4169 /* Naked functions are implemented entirely in assembly, including the
4170 return sequence, so suppress warnings about this. */
4171 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
4175 /* Output assembler code for a block containing the constant parts
4176 of a trampoline, leaving space for the variable parts.
4178 On the ARM, (if r8 is the static chain regnum, and remembering that
4179 referencing pc adds an offset of 8) the trampoline looks like:
4182 .word static chain value
4183 .word function's address
4184 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4186 In FDPIC mode, the trampoline looks like:
4187 .word trampoline address
4188 .word trampoline GOT address
4189 ldr r12, [pc, #8] ; #4 for Arm mode
4190 ldr r9, [pc, #8] ; #4 for Arm mode
4191 ldr pc, [pc, #8] ; #4 for Arm mode
4192 .word static chain value
4194 .word function's address
4198 arm_asm_trampoline_template (FILE *f
)
4200 fprintf (f
, "\t.syntax unified\n");
4204 /* The first two words are a function descriptor pointing to the
4205 trampoline code just below. */
4207 fprintf (f
, "\t.arm\n");
4208 else if (TARGET_THUMB2
)
4209 fprintf (f
, "\t.thumb\n");
4211 /* Only ARM and Thumb-2 are supported. */
4214 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4215 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4216 /* Trampoline code which sets the static chain register but also
4217 PIC register before jumping into real code. */
4218 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4219 STATIC_CHAIN_REGNUM
, PC_REGNUM
,
4220 TARGET_THUMB2
? 8 : 4);
4221 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4222 PIC_OFFSET_TABLE_REGNUM
, PC_REGNUM
,
4223 TARGET_THUMB2
? 8 : 4);
4224 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4225 PC_REGNUM
, PC_REGNUM
,
4226 TARGET_THUMB2
? 8 : 4);
4227 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4229 else if (TARGET_ARM
)
4231 fprintf (f
, "\t.arm\n");
4232 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
4233 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
4235 else if (TARGET_THUMB2
)
4237 fprintf (f
, "\t.thumb\n");
4238 /* The Thumb-2 trampoline is similar to the arm implementation.
4239 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4240 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
4241 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
4242 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
4246 ASM_OUTPUT_ALIGN (f
, 2);
4247 fprintf (f
, "\t.code\t16\n");
4248 fprintf (f
, ".Ltrampoline_start:\n");
4249 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
4250 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4251 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
4252 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4253 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
4254 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
4256 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4257 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4260 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4263 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4265 rtx fnaddr
, mem
, a_tramp
;
4267 emit_block_move (m_tramp
, assemble_trampoline_template (),
4268 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
4272 rtx funcdesc
= XEXP (DECL_RTL (fndecl
), 0);
4273 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
4274 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
4275 /* The function start address is at offset 8, but in Thumb mode
4276 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4278 rtx trampoline_code_start
4279 = plus_constant (Pmode
, XEXP (m_tramp
, 0), TARGET_THUMB2
? 9 : 8);
4281 /* Write initial funcdesc which points to the trampoline. */
4282 mem
= adjust_address (m_tramp
, SImode
, 0);
4283 emit_move_insn (mem
, trampoline_code_start
);
4284 mem
= adjust_address (m_tramp
, SImode
, 4);
4285 emit_move_insn (mem
, gen_rtx_REG (Pmode
, PIC_OFFSET_TABLE_REGNUM
));
4286 /* Setup static chain. */
4287 mem
= adjust_address (m_tramp
, SImode
, 20);
4288 emit_move_insn (mem
, chain_value
);
4289 /* GOT + real function entry point. */
4290 mem
= adjust_address (m_tramp
, SImode
, 24);
4291 emit_move_insn (mem
, gotaddr
);
4292 mem
= adjust_address (m_tramp
, SImode
, 28);
4293 emit_move_insn (mem
, fnaddr
);
4297 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
4298 emit_move_insn (mem
, chain_value
);
4300 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
4301 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4302 emit_move_insn (mem
, fnaddr
);
4305 a_tramp
= XEXP (m_tramp
, 0);
4306 maybe_emit_call_builtin___clear_cache (a_tramp
,
4307 plus_constant (ptr_mode
,
4312 /* Thumb trampolines should be entered in thumb mode, so set
4313 the bottom bit of the address. */
4316 arm_trampoline_adjust_address (rtx addr
)
4318 /* For FDPIC don't fix trampoline address since it's a function
4319 descriptor and not a function address. */
4320 if (TARGET_THUMB
&& !TARGET_FDPIC
)
4321 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
4322 NULL
, 0, OPTAB_LIB_WIDEN
);
4326 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4327 includes call-clobbered registers too. If this is a leaf function
4328 we can just examine the registers used by the RTL, but otherwise we
4329 have to assume that whatever function is called might clobber
4330 anything, and so we have to save all the call-clobbered registers
4332 static inline bool reg_needs_saving_p (unsigned reg
)
4334 unsigned long func_type
= arm_current_func_type ();
4336 if (IS_INTERRUPT (func_type
))
4337 if (df_regs_ever_live_p (reg
)
4338 /* Save call-clobbered core registers. */
4339 || (! crtl
->is_leaf
&& call_used_or_fixed_reg_p (reg
) && reg
< FIRST_VFP_REGNUM
))
4344 if (!df_regs_ever_live_p (reg
)
4345 || call_used_or_fixed_reg_p (reg
))
4351 /* Return 1 if it is possible to return using a single instruction.
4352 If SIBLING is non-null, this is a test for a return before a sibling
4353 call. SIBLING is the call insn, so we can examine its register usage. */
4356 use_return_insn (int iscond
, rtx sibling
)
4359 unsigned int func_type
;
4360 unsigned long saved_int_regs
;
4361 unsigned HOST_WIDE_INT stack_adjust
;
4362 arm_stack_offsets
*offsets
;
4364 /* Never use a return instruction before reload has run. */
4365 if (!reload_completed
)
4368 /* Never use a return instruction when return address signing
4369 mechanism is enabled as it requires more than one
4371 if (arm_current_function_pac_enabled_p ())
4374 func_type
= arm_current_func_type ();
4376 /* Naked, volatile and stack alignment functions need special
4378 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
4381 /* So do interrupt functions that use the frame pointer and Thumb
4382 interrupt functions. */
4383 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
4386 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
4387 && !optimize_function_for_size_p (cfun
))
4390 offsets
= arm_get_frame_offsets ();
4391 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
4393 /* As do variadic functions. */
4394 if (crtl
->args
.pretend_args_size
4395 || cfun
->machine
->uses_anonymous_args
4396 /* Or if the function calls __builtin_eh_return () */
4397 || crtl
->calls_eh_return
4398 /* Or if the function calls alloca */
4399 || cfun
->calls_alloca
4400 /* Or if there is a stack adjustment. However, if the stack pointer
4401 is saved on the stack, we can use a pre-incrementing stack load. */
4402 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
4403 && stack_adjust
== 4))
4404 /* Or if the static chain register was saved above the frame, under the
4405 assumption that the stack pointer isn't saved on the stack. */
4406 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
4407 && arm_compute_static_chain_stack_bytes() != 0))
4410 saved_int_regs
= offsets
->saved_regs_mask
;
4412 /* Unfortunately, the insn
4414 ldmib sp, {..., sp, ...}
4416 triggers a bug on most SA-110 based devices, such that the stack
4417 pointer won't be correctly restored if the instruction takes a
4418 page fault. We work around this problem by popping r3 along with
4419 the other registers, since that is never slower than executing
4420 another instruction.
4422 We test for !arm_arch5t here, because code for any architecture
4423 less than this could potentially be run on one of the buggy
4425 if (stack_adjust
== 4 && !arm_arch5t
&& TARGET_ARM
)
4427 /* Validate that r3 is a call-clobbered register (always true in
4428 the default abi) ... */
4429 if (!call_used_or_fixed_reg_p (3))
4432 /* ... that it isn't being used for a return value ... */
4433 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
4436 /* ... or for a tail-call argument ... */
4439 gcc_assert (CALL_P (sibling
));
4441 if (find_regno_fusage (sibling
, USE
, 3))
4445 /* ... and that there are no call-saved registers in r0-r2
4446 (always true in the default ABI). */
4447 if (saved_int_regs
& 0x7)
4451 /* Can't be done if interworking with Thumb, and any registers have been
4453 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
4456 /* On StrongARM, conditional returns are expensive if they aren't
4457 taken and multiple registers have been stacked. */
4458 if (iscond
&& arm_tune_strongarm
)
4460 /* Conditional return when just the LR is stored is a simple
4461 conditional-load instruction, that's not expensive. */
4462 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
4466 && arm_pic_register
!= INVALID_REGNUM
4467 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4471 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4472 several instructions if anything needs to be popped. Armv8.1-M Mainline
4473 also needs several instructions to save and restore FP context. */
4474 if (IS_CMSE_ENTRY (func_type
) && (saved_int_regs
|| TARGET_HAVE_FPCXT_CMSE
))
4477 /* If there are saved registers but the LR isn't saved, then we need
4478 two instructions for the return. */
4479 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4482 /* Can't be done if any of the VFP regs are pushed,
4483 since this also requires an insn. */
4484 if (TARGET_VFP_BASE
)
4485 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4486 if (reg_needs_saving_p (regno
))
4489 if (TARGET_REALLY_IWMMXT
)
4490 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4491 if (reg_needs_saving_p (regno
))
4497 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4498 shrink-wrapping if possible. This is the case if we need to emit a
4499 prologue, which we can test by looking at the offsets. */
4501 use_simple_return_p (void)
4503 arm_stack_offsets
*offsets
;
4505 /* Note this function can be called before or after reload. */
4506 if (!reload_completed
)
4507 arm_compute_frame_layout ();
4509 offsets
= arm_get_frame_offsets ();
4510 return offsets
->outgoing_args
!= 0;
4513 /* Return TRUE if int I is a valid immediate ARM constant. */
4516 const_ok_for_arm (HOST_WIDE_INT i
)
4520 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4521 be all zero, or all one. */
4522 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4523 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4524 != ((~(unsigned HOST_WIDE_INT
) 0)
4525 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4528 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4530 /* Fast return for 0 and small values. We must do this for zero, since
4531 the code below can't handle that one case. */
4532 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4535 /* Get the number of trailing zeros. */
4536 lowbit
= ffs((int) i
) - 1;
4538 /* Only even shifts are allowed in ARM mode so round down to the
4539 nearest even number. */
4543 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4548 /* Allow rotated constants in ARM mode. */
4550 && ((i
& ~0xc000003f) == 0
4551 || (i
& ~0xf000000f) == 0
4552 || (i
& ~0xfc000003) == 0))
4555 else if (TARGET_THUMB2
)
4559 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4562 if (i
== v
|| i
== (v
| (v
<< 8)))
4565 /* Allow repeated pattern 0xXY00XY00. */
4571 else if (TARGET_HAVE_MOVT
)
4573 /* Thumb-1 Targets with MOVT. */
4583 /* Return true if I is a valid constant for the operation CODE. */
4585 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4587 if (const_ok_for_arm (i
))
4593 /* See if we can use movw. */
4594 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4597 /* Otherwise, try mvn. */
4598 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4601 /* See if we can use addw or subw. */
4603 && ((i
& 0xfffff000) == 0
4604 || ((-i
) & 0xfffff000) == 0))
4625 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4627 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4633 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4637 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4644 /* Return true if I is a valid di mode constant for the operation CODE. */
4646 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4648 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4649 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4650 rtx hi
= GEN_INT (hi_val
);
4651 rtx lo
= GEN_INT (lo_val
);
4661 return const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF
4662 || const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF;
4664 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4671 /* Emit a sequence of insns to handle a large constant.
4672 CODE is the code of the operation required, it can be any of SET, PLUS,
4673 IOR, AND, XOR, MINUS;
4674 MODE is the mode in which the operation is being performed;
4675 VAL is the integer to operate on;
4676 SOURCE is the other operand (a register, or a null-pointer for SET);
4677 SUBTARGETS means it is safe to create scratch registers if that will
4678 either produce a simpler sequence, or we will want to cse the values.
4679 Return value is the number of insns emitted. */
4681 /* ??? Tweak this for thumb2. */
4683 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4684 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4688 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4689 cond
= COND_EXEC_TEST (PATTERN (insn
));
4693 if (subtargets
|| code
== SET
4694 || (REG_P (target
) && REG_P (source
)
4695 && REGNO (target
) != REGNO (source
)))
4697 /* After arm_reorg has been called, we can't fix up expensive
4698 constants by pushing them into memory so we must synthesize
4699 them in-line, regardless of the cost. This is only likely to
4700 be more costly on chips that have load delay slots and we are
4701 compiling without running the scheduler (so no splitting
4702 occurred before the final instruction emission).
4704 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4706 if (!cfun
->machine
->after_arm_reorg
4708 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4710 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4715 /* Currently SET is the only monadic value for CODE, all
4716 the rest are diadic. */
4717 if (TARGET_USE_MOVT
)
4718 arm_emit_movpair (target
, GEN_INT (val
));
4720 emit_set_insn (target
, GEN_INT (val
));
4726 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4728 if (TARGET_USE_MOVT
)
4729 arm_emit_movpair (temp
, GEN_INT (val
));
4731 emit_set_insn (temp
, GEN_INT (val
));
4733 /* For MINUS, the value is subtracted from, since we never
4734 have subtraction of a constant. */
4736 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4738 emit_set_insn (target
,
4739 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4745 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4749 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4750 ARM/THUMB2 immediates, and add up to VAL.
4751 Thr function return value gives the number of insns required. */
4753 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4754 struct four_ints
*return_sequence
)
4756 int best_consecutive_zeros
= 0;
4760 struct four_ints tmp_sequence
;
4762 /* If we aren't targeting ARM, the best place to start is always at
4763 the bottom, otherwise look more closely. */
4766 for (i
= 0; i
< 32; i
+= 2)
4768 int consecutive_zeros
= 0;
4770 if (!(val
& (3 << i
)))
4772 while ((i
< 32) && !(val
& (3 << i
)))
4774 consecutive_zeros
+= 2;
4777 if (consecutive_zeros
> best_consecutive_zeros
)
4779 best_consecutive_zeros
= consecutive_zeros
;
4780 best_start
= i
- consecutive_zeros
;
4787 /* So long as it won't require any more insns to do so, it's
4788 desirable to emit a small constant (in bits 0...9) in the last
4789 insn. This way there is more chance that it can be combined with
4790 a later addressing insn to form a pre-indexed load or store
4791 operation. Consider:
4793 *((volatile int *)0xe0000100) = 1;
4794 *((volatile int *)0xe0000110) = 2;
4796 We want this to wind up as:
4800 str rB, [rA, #0x100]
4802 str rB, [rA, #0x110]
4804 rather than having to synthesize both large constants from scratch.
4806 Therefore, we calculate how many insns would be required to emit
4807 the constant starting from `best_start', and also starting from
4808 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4809 yield a shorter sequence, we may as well use zero. */
4810 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4812 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4814 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4815 if (insns2
<= insns1
)
4817 *return_sequence
= tmp_sequence
;
4825 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4827 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4828 struct four_ints
*return_sequence
, int i
)
4830 int remainder
= val
& 0xffffffff;
4833 /* Try and find a way of doing the job in either two or three
4836 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4837 location. We start at position I. This may be the MSB, or
4838 optimial_immediate_sequence may have positioned it at the largest block
4839 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4840 wrapping around to the top of the word when we drop off the bottom.
4841 In the worst case this code should produce no more than four insns.
4843 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4844 constants, shifted to any arbitrary location. We should always start
4849 unsigned int b1
, b2
, b3
, b4
;
4850 unsigned HOST_WIDE_INT result
;
4853 gcc_assert (insns
< 4);
4858 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4859 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4862 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4863 /* We can use addw/subw for the last 12 bits. */
4867 /* Use an 8-bit shifted/rotated immediate. */
4871 result
= remainder
& ((0x0ff << end
)
4872 | ((i
< end
) ? (0xff >> (32 - end
))
4879 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4880 arbitrary shifts. */
4881 i
-= TARGET_ARM
? 2 : 1;
4885 /* Next, see if we can do a better job with a thumb2 replicated
4888 We do it this way around to catch the cases like 0x01F001E0 where
4889 two 8-bit immediates would work, but a replicated constant would
4892 TODO: 16-bit constants that don't clear all the bits, but still win.
4893 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4896 b1
= (remainder
& 0xff000000) >> 24;
4897 b2
= (remainder
& 0x00ff0000) >> 16;
4898 b3
= (remainder
& 0x0000ff00) >> 8;
4899 b4
= remainder
& 0xff;
4903 /* The 8-bit immediate already found clears b1 (and maybe b2),
4904 but must leave b3 and b4 alone. */
4906 /* First try to find a 32-bit replicated constant that clears
4907 almost everything. We can assume that we can't do it in one,
4908 or else we wouldn't be here. */
4909 unsigned int tmp
= b1
& b2
& b3
& b4
;
4910 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4912 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4913 + (tmp
== b3
) + (tmp
== b4
);
4915 && (matching_bytes
>= 3
4916 || (matching_bytes
== 2
4917 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4919 /* At least 3 of the bytes match, and the fourth has at
4920 least as many bits set, or two of the bytes match
4921 and it will only require one more insn to finish. */
4929 /* Second, try to find a 16-bit replicated constant that can
4930 leave three of the bytes clear. If b2 or b4 is already
4931 zero, then we can. If the 8-bit from above would not
4932 clear b2 anyway, then we still win. */
4933 else if (b1
== b3
&& (!b2
|| !b4
4934 || (remainder
& 0x00ff0000 & ~result
)))
4936 result
= remainder
& 0xff00ff00;
4942 /* The 8-bit immediate already found clears b2 (and maybe b3)
4943 and we don't get here unless b1 is alredy clear, but it will
4944 leave b4 unchanged. */
4946 /* If we can clear b2 and b4 at once, then we win, since the
4947 8-bits couldn't possibly reach that far. */
4950 result
= remainder
& 0x00ff00ff;
4956 return_sequence
->i
[insns
++] = result
;
4957 remainder
&= ~result
;
4959 if (code
== SET
|| code
== MINUS
)
4967 /* Emit an instruction with the indicated PATTERN. If COND is
4968 non-NULL, conditionalize the execution of the instruction on COND
4972 emit_constant_insn (rtx cond
, rtx pattern
)
4975 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4976 emit_insn (pattern
);
4979 /* As above, but extra parameter GENERATE which, if clear, suppresses
4983 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4984 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4985 int subtargets
, int generate
)
4989 int final_invert
= 0;
4991 int set_sign_bit_copies
= 0;
4992 int clear_sign_bit_copies
= 0;
4993 int clear_zero_bit_copies
= 0;
4994 int set_zero_bit_copies
= 0;
4995 int insns
= 0, neg_insns
, inv_insns
;
4996 unsigned HOST_WIDE_INT temp1
, temp2
;
4997 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4998 struct four_ints
*immediates
;
4999 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
5001 /* Find out which operations are safe for a given CODE. Also do a quick
5002 check for degenerate cases; these can occur when DImode operations
5015 if (remainder
== 0xffffffff)
5018 emit_constant_insn (cond
,
5019 gen_rtx_SET (target
,
5020 GEN_INT (ARM_SIGN_EXTEND (val
))));
5026 if (reload_completed
&& rtx_equal_p (target
, source
))
5030 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5039 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
5042 if (remainder
== 0xffffffff)
5044 if (reload_completed
&& rtx_equal_p (target
, source
))
5047 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5056 if (reload_completed
&& rtx_equal_p (target
, source
))
5059 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5063 if (remainder
== 0xffffffff)
5066 emit_constant_insn (cond
,
5067 gen_rtx_SET (target
,
5068 gen_rtx_NOT (mode
, source
)));
5075 /* We treat MINUS as (val - source), since (source - val) is always
5076 passed as (source + (-val)). */
5080 emit_constant_insn (cond
,
5081 gen_rtx_SET (target
,
5082 gen_rtx_NEG (mode
, source
)));
5085 if (const_ok_for_arm (val
))
5088 emit_constant_insn (cond
,
5089 gen_rtx_SET (target
,
5090 gen_rtx_MINUS (mode
, GEN_INT (val
),
5101 /* If we can do it in one insn get out quickly. */
5102 if (const_ok_for_op (val
, code
))
5105 emit_constant_insn (cond
,
5106 gen_rtx_SET (target
,
5108 ? gen_rtx_fmt_ee (code
, mode
, source
,
5114 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5116 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
5117 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
5121 if (mode
== SImode
&& i
== 16)
5122 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5124 emit_constant_insn (cond
,
5125 gen_zero_extendhisi2
5126 (target
, gen_lowpart (HImode
, source
)));
5128 /* Extz only supports SImode, but we can coerce the operands
5130 emit_constant_insn (cond
,
5131 gen_extzv_t2 (gen_lowpart (SImode
, target
),
5132 gen_lowpart (SImode
, source
),
5133 GEN_INT (i
), const0_rtx
));
5139 /* Calculate a few attributes that may be useful for specific
5141 /* Count number of leading zeros. */
5142 for (i
= 31; i
>= 0; i
--)
5144 if ((remainder
& (1 << i
)) == 0)
5145 clear_sign_bit_copies
++;
5150 /* Count number of leading 1's. */
5151 for (i
= 31; i
>= 0; i
--)
5153 if ((remainder
& (1 << i
)) != 0)
5154 set_sign_bit_copies
++;
5159 /* Count number of trailing zero's. */
5160 for (i
= 0; i
<= 31; i
++)
5162 if ((remainder
& (1 << i
)) == 0)
5163 clear_zero_bit_copies
++;
5168 /* Count number of trailing 1's. */
5169 for (i
= 0; i
<= 31; i
++)
5171 if ((remainder
& (1 << i
)) != 0)
5172 set_zero_bit_copies
++;
5180 /* See if we can do this by sign_extending a constant that is known
5181 to be negative. This is a good, way of doing it, since the shift
5182 may well merge into a subsequent insn. */
5183 if (set_sign_bit_copies
> 1)
5185 if (const_ok_for_arm
5186 (temp1
= ARM_SIGN_EXTEND (remainder
5187 << (set_sign_bit_copies
- 1))))
5191 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5192 emit_constant_insn (cond
,
5193 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5194 emit_constant_insn (cond
,
5195 gen_ashrsi3 (target
, new_src
,
5196 GEN_INT (set_sign_bit_copies
- 1)));
5200 /* For an inverted constant, we will need to set the low bits,
5201 these will be shifted out of harm's way. */
5202 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
5203 if (const_ok_for_arm (~temp1
))
5207 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5208 emit_constant_insn (cond
,
5209 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5210 emit_constant_insn (cond
,
5211 gen_ashrsi3 (target
, new_src
,
5212 GEN_INT (set_sign_bit_copies
- 1)));
5218 /* See if we can calculate the value as the difference between two
5219 valid immediates. */
5220 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
5222 int topshift
= clear_sign_bit_copies
& ~1;
5224 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
5225 & (0xff000000 >> topshift
));
5227 /* If temp1 is zero, then that means the 9 most significant
5228 bits of remainder were 1 and we've caused it to overflow.
5229 When topshift is 0 we don't need to do anything since we
5230 can borrow from 'bit 32'. */
5231 if (temp1
== 0 && topshift
!= 0)
5232 temp1
= 0x80000000 >> (topshift
- 1);
5234 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
5236 if (const_ok_for_arm (temp2
))
5240 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5241 emit_constant_insn (cond
,
5242 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5243 emit_constant_insn (cond
,
5244 gen_addsi3 (target
, new_src
,
5252 /* See if we can generate this by setting the bottom (or the top)
5253 16 bits, and then shifting these into the other half of the
5254 word. We only look for the simplest cases, to do more would cost
5255 too much. Be careful, however, not to generate this when the
5256 alternative would take fewer insns. */
5257 if (val
& 0xffff0000)
5259 temp1
= remainder
& 0xffff0000;
5260 temp2
= remainder
& 0x0000ffff;
5262 /* Overlaps outside this range are best done using other methods. */
5263 for (i
= 9; i
< 24; i
++)
5265 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
5266 && !const_ok_for_arm (temp2
))
5268 rtx new_src
= (subtargets
5269 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
5271 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
5272 source
, subtargets
, generate
);
5280 gen_rtx_ASHIFT (mode
, source
,
5287 /* Don't duplicate cases already considered. */
5288 for (i
= 17; i
< 24; i
++)
5290 if (((temp1
| (temp1
>> i
)) == remainder
)
5291 && !const_ok_for_arm (temp1
))
5293 rtx new_src
= (subtargets
5294 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
5296 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
5297 source
, subtargets
, generate
);
5302 gen_rtx_SET (target
,
5305 gen_rtx_LSHIFTRT (mode
, source
,
5316 /* If we have IOR or XOR, and the constant can be loaded in a
5317 single instruction, and we can find a temporary to put it in,
5318 then this can be done in two instructions instead of 3-4. */
5320 /* TARGET can't be NULL if SUBTARGETS is 0 */
5321 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
5323 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
5327 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5329 emit_constant_insn (cond
,
5330 gen_rtx_SET (sub
, GEN_INT (val
)));
5331 emit_constant_insn (cond
,
5332 gen_rtx_SET (target
,
5333 gen_rtx_fmt_ee (code
, mode
,
5344 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5345 and the remainder 0s for e.g. 0xfff00000)
5346 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5348 This can be done in 2 instructions by using shifts with mov or mvn.
5353 mvn r0, r0, lsr #12 */
5354 if (set_sign_bit_copies
> 8
5355 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
5359 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5360 rtx shift
= GEN_INT (set_sign_bit_copies
);
5366 gen_rtx_ASHIFT (mode
,
5371 gen_rtx_SET (target
,
5373 gen_rtx_LSHIFTRT (mode
, sub
,
5380 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5382 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5384 For eg. r0 = r0 | 0xfff
5389 if (set_zero_bit_copies
> 8
5390 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
5394 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5395 rtx shift
= GEN_INT (set_zero_bit_copies
);
5401 gen_rtx_LSHIFTRT (mode
,
5406 gen_rtx_SET (target
,
5408 gen_rtx_ASHIFT (mode
, sub
,
5414 /* This will never be reached for Thumb2 because orn is a valid
5415 instruction. This is for Thumb1 and the ARM 32 bit cases.
5417 x = y | constant (such that ~constant is a valid constant)
5419 x = ~(~y & ~constant).
5421 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
5425 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5426 emit_constant_insn (cond
,
5428 gen_rtx_NOT (mode
, source
)));
5431 sub
= gen_reg_rtx (mode
);
5432 emit_constant_insn (cond
,
5434 gen_rtx_AND (mode
, source
,
5436 emit_constant_insn (cond
,
5437 gen_rtx_SET (target
,
5438 gen_rtx_NOT (mode
, sub
)));
5445 /* See if two shifts will do 2 or more insn's worth of work. */
5446 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
5448 HOST_WIDE_INT shift_mask
= ((0xffffffff
5449 << (32 - clear_sign_bit_copies
))
5452 if ((remainder
| shift_mask
) != 0xffffffff)
5454 HOST_WIDE_INT new_val
5455 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5459 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5460 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
5461 new_src
, source
, subtargets
, 1);
5466 rtx targ
= subtargets
? NULL_RTX
: target
;
5467 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5468 targ
, source
, subtargets
, 0);
5474 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5475 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5477 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5478 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5484 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5486 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5488 if ((remainder
| shift_mask
) != 0xffffffff)
5490 HOST_WIDE_INT new_val
5491 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5494 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5496 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5497 new_src
, source
, subtargets
, 1);
5502 rtx targ
= subtargets
? NULL_RTX
: target
;
5504 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5505 targ
, source
, subtargets
, 0);
5511 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5512 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5514 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5515 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5527 /* Calculate what the instruction sequences would be if we generated it
5528 normally, negated, or inverted. */
5530 /* AND cannot be split into multiple insns, so invert and use BIC. */
5533 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5536 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5541 if (can_invert
|| final_invert
)
5542 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5547 immediates
= &pos_immediates
;
5549 /* Is the negated immediate sequence more efficient? */
5550 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5553 immediates
= &neg_immediates
;
5558 /* Is the inverted immediate sequence more efficient?
5559 We must allow for an extra NOT instruction for XOR operations, although
5560 there is some chance that the final 'mvn' will get optimized later. */
5561 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5564 immediates
= &inv_immediates
;
5572 /* Now output the chosen sequence as instructions. */
5575 for (i
= 0; i
< insns
; i
++)
5577 rtx new_src
, temp1_rtx
;
5579 temp1
= immediates
->i
[i
];
5581 if (code
== SET
|| code
== MINUS
)
5582 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5583 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5584 new_src
= gen_reg_rtx (mode
);
5590 else if (can_negate
)
5593 temp1
= trunc_int_for_mode (temp1
, mode
);
5594 temp1_rtx
= GEN_INT (temp1
);
5598 else if (code
== MINUS
)
5599 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5601 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5603 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5608 can_negate
= can_invert
;
5612 else if (code
== MINUS
)
5620 emit_constant_insn (cond
, gen_rtx_SET (target
,
5621 gen_rtx_NOT (mode
, source
)));
5628 /* Return TRUE if op is a constant where both the low and top words are
5629 suitable for RSB/RSC instructions. This is never true for Thumb, since
5630 we do not have RSC in that case. */
5632 arm_const_double_prefer_rsbs_rsc (rtx op
)
5634 /* Thumb lacks RSC, so we never prefer that sequence. */
5635 if (TARGET_THUMB
|| !CONST_INT_P (op
))
5637 HOST_WIDE_INT hi
, lo
;
5638 lo
= UINTVAL (op
) & 0xffffffffULL
;
5639 hi
= UINTVAL (op
) >> 32;
5640 return const_ok_for_arm (lo
) && const_ok_for_arm (hi
);
5643 /* Canonicalize a comparison so that we are more likely to recognize it.
5644 This can be done for a few constant compares, where we can make the
5645 immediate value easier to load. */
5648 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5649 bool op0_preserve_value
)
5652 unsigned HOST_WIDE_INT i
, maxval
;
5654 mode
= GET_MODE (*op0
);
5655 if (mode
== VOIDmode
)
5656 mode
= GET_MODE (*op1
);
5658 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5660 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5661 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5662 either reversed or (for constant OP1) adjusted to GE/LT.
5663 Similarly for GTU/LEU in Thumb mode. */
5667 if (*code
== GT
|| *code
== LE
5668 || *code
== GTU
|| *code
== LEU
)
5670 /* Missing comparison. First try to use an available
5672 if (CONST_INT_P (*op1
))
5681 /* Try to convert to GE/LT, unless that would be more
5683 if (!arm_const_double_by_immediates (GEN_INT (i
+ 1))
5684 && arm_const_double_prefer_rsbs_rsc (*op1
))
5686 *op1
= GEN_INT (i
+ 1);
5687 *code
= *code
== GT
? GE
: LT
;
5691 /* GT maxval is always false, LE maxval is always true.
5692 We can't fold that away here as we must make a
5693 comparison, but we can fold them to comparisons
5694 with the same result that can be handled:
5695 op0 GT maxval -> op0 LT minval
5696 op0 LE maxval -> op0 GE minval
5697 where minval = (-maxval - 1). */
5698 *op1
= GEN_INT (-maxval
- 1);
5699 *code
= *code
== GT
? LT
: GE
;
5705 if (i
!= ~((unsigned HOST_WIDE_INT
) 0))
5707 /* Try to convert to GEU/LTU, unless that would
5708 be more expensive. */
5709 if (!arm_const_double_by_immediates (GEN_INT (i
+ 1))
5710 && arm_const_double_prefer_rsbs_rsc (*op1
))
5712 *op1
= GEN_INT (i
+ 1);
5713 *code
= *code
== GTU
? GEU
: LTU
;
5717 /* GTU ~0 is always false, LEU ~0 is always true.
5718 We can't fold that away here as we must make a
5719 comparison, but we can fold them to comparisons
5720 with the same result that can be handled:
5721 op0 GTU ~0 -> op0 LTU 0
5722 op0 LEU ~0 -> op0 GEU 0. */
5724 *code
= *code
== GTU
? LTU
: GEU
;
5733 if (!op0_preserve_value
)
5735 std::swap (*op0
, *op1
);
5736 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5742 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5743 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5744 to facilitate possible combining with a cmp into 'ands'. */
5746 && GET_CODE (*op0
) == ZERO_EXTEND
5747 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5748 && GET_MODE (XEXP (*op0
, 0)) == QImode
5749 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5750 && subreg_lowpart_p (XEXP (*op0
, 0))
5751 && *op1
== const0_rtx
)
5752 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5755 /* Comparisons smaller than DImode. Only adjust comparisons against
5756 an out-of-range constant. */
5757 if (!CONST_INT_P (*op1
)
5758 || const_ok_for_arm (INTVAL (*op1
))
5759 || const_ok_for_arm (- INTVAL (*op1
)))
5773 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5775 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5776 *code
= *code
== GT
? GE
: LT
;
5784 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5786 *op1
= GEN_INT (i
- 1);
5787 *code
= *code
== GE
? GT
: LE
;
5794 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5795 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5797 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5798 *code
= *code
== GTU
? GEU
: LTU
;
5806 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5808 *op1
= GEN_INT (i
- 1);
5809 *code
= *code
== GEU
? GTU
: LEU
;
5820 /* Define how to find the value returned by a function. */
5823 arm_function_value(const_tree type
, const_tree func
,
5824 bool outgoing ATTRIBUTE_UNUSED
)
5827 int unsignedp ATTRIBUTE_UNUSED
;
5828 rtx r ATTRIBUTE_UNUSED
;
5830 mode
= TYPE_MODE (type
);
5832 if (TARGET_AAPCS_BASED
)
5833 return aapcs_allocate_return_reg (mode
, type
, func
);
5835 /* Promote integer types. */
5836 if (INTEGRAL_TYPE_P (type
))
5837 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5839 /* Promotes small structs returned in a register to full-word size
5840 for big-endian AAPCS. */
5841 if (arm_return_in_msb (type
))
5843 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5844 if (size
% UNITS_PER_WORD
!= 0)
5846 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5847 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
5851 return arm_libcall_value_1 (mode
);
5854 /* libcall hashtable helpers. */
5856 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5858 static inline hashval_t
hash (const rtx_def
*);
5859 static inline bool equal (const rtx_def
*, const rtx_def
*);
5860 static inline void remove (rtx_def
*);
5864 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5866 return rtx_equal_p (p1
, p2
);
5870 libcall_hasher::hash (const rtx_def
*p1
)
5872 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5875 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5878 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5880 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5884 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5886 static bool init_done
= false;
5887 static libcall_table_type
*libcall_htab
= NULL
;
5893 libcall_htab
= new libcall_table_type (31);
5894 add_libcall (libcall_htab
,
5895 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5896 add_libcall (libcall_htab
,
5897 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5898 add_libcall (libcall_htab
,
5899 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5900 add_libcall (libcall_htab
,
5901 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5903 add_libcall (libcall_htab
,
5904 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5905 add_libcall (libcall_htab
,
5906 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5907 add_libcall (libcall_htab
,
5908 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5909 add_libcall (libcall_htab
,
5910 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5912 add_libcall (libcall_htab
,
5913 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5914 add_libcall (libcall_htab
,
5915 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5916 add_libcall (libcall_htab
,
5917 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5918 add_libcall (libcall_htab
,
5919 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5920 add_libcall (libcall_htab
,
5921 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5922 add_libcall (libcall_htab
,
5923 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5924 add_libcall (libcall_htab
,
5925 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5926 add_libcall (libcall_htab
,
5927 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5928 add_libcall (libcall_htab
,
5929 convert_optab_libfunc (sfix_optab
, SImode
, SFmode
));
5930 add_libcall (libcall_htab
,
5931 convert_optab_libfunc (ufix_optab
, SImode
, SFmode
));
5933 /* Values from double-precision helper functions are returned in core
5934 registers if the selected core only supports single-precision
5935 arithmetic, even if we are using the hard-float ABI. The same is
5936 true for single-precision helpers except in case of MVE, because in
5937 MVE we will be using the hard-float ABI on a CPU which doesn't support
5938 single-precision operations in hardware. In MVE the following check
5939 enables use of emulation for the single-precision arithmetic
5941 if (TARGET_HAVE_MVE
)
5943 add_libcall (libcall_htab
, optab_libfunc (add_optab
, SFmode
));
5944 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, SFmode
));
5945 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, SFmode
));
5946 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, SFmode
));
5947 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, SFmode
));
5948 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, SFmode
));
5949 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, SFmode
));
5950 add_libcall (libcall_htab
, optab_libfunc (le_optab
, SFmode
));
5951 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, SFmode
));
5952 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, SFmode
));
5953 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, SFmode
));
5955 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5956 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5957 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5958 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5959 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5960 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5961 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5962 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5963 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5964 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5965 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5966 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5968 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5970 add_libcall (libcall_htab
,
5971 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5974 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5978 arm_libcall_value_1 (machine_mode mode
)
5980 if (TARGET_AAPCS_BASED
)
5981 return aapcs_libcall_value (mode
);
5982 else if (TARGET_IWMMXT_ABI
5983 && arm_vector_mode_supported_p (mode
))
5984 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5986 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5989 /* Define how to find the value returned by a library function
5990 assuming the value has mode MODE. */
5993 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5995 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5996 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5998 /* The following libcalls return their result in integer registers,
5999 even though they return a floating point value. */
6000 if (arm_libcall_uses_aapcs_base (libcall
))
6001 return gen_rtx_REG (mode
, ARG_REGISTER(1));
6005 return arm_libcall_value_1 (mode
);
6008 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
6011 arm_function_value_regno_p (const unsigned int regno
)
6013 if (regno
== ARG_REGISTER (1)
6015 && TARGET_AAPCS_BASED
6016 && TARGET_HARD_FLOAT
6017 && regno
== FIRST_VFP_REGNUM
)
6018 || (TARGET_IWMMXT_ABI
6019 && regno
== FIRST_IWMMXT_REGNUM
))
6025 /* Determine the amount of memory needed to store the possible return
6026 registers of an untyped call. */
6028 arm_apply_result_size (void)
6034 if (TARGET_HARD_FLOAT_ABI
)
6036 if (TARGET_IWMMXT_ABI
)
6043 /* Decide whether TYPE should be returned in memory (true)
6044 or in a register (false). FNTYPE is the type of the function making
6047 arm_return_in_memory (const_tree type
, const_tree fntype
)
6051 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
6053 if (TARGET_AAPCS_BASED
)
6055 /* Simple, non-aggregate types (ie not including vectors and
6056 complex) are always returned in a register (or registers).
6057 We don't care about which register here, so we can short-cut
6058 some of the detail. */
6059 if (!AGGREGATE_TYPE_P (type
)
6060 && TREE_CODE (type
) != VECTOR_TYPE
6061 && TREE_CODE (type
) != COMPLEX_TYPE
)
6064 /* Any return value that is no larger than one word can be
6066 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
6069 /* Check any available co-processors to see if they accept the
6070 type as a register candidate (VFP, for example, can return
6071 some aggregates in consecutive registers). These aren't
6072 available if the call is variadic. */
6073 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
6076 /* Vector values should be returned using ARM registers, not
6077 memory (unless they're over 16 bytes, which will break since
6078 we only have four call-clobbered registers to play with). */
6079 if (TREE_CODE (type
) == VECTOR_TYPE
)
6080 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
6082 /* The rest go in memory. */
6086 if (TREE_CODE (type
) == VECTOR_TYPE
)
6087 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
6089 if (!AGGREGATE_TYPE_P (type
) &&
6090 (TREE_CODE (type
) != VECTOR_TYPE
))
6091 /* All simple types are returned in registers. */
6094 if (arm_abi
!= ARM_ABI_APCS
)
6096 /* ATPCS and later return aggregate types in memory only if they are
6097 larger than a word (or are variable size). */
6098 return (size
< 0 || size
> UNITS_PER_WORD
);
6101 /* For the arm-wince targets we choose to be compatible with Microsoft's
6102 ARM and Thumb compilers, which always return aggregates in memory. */
6104 /* All structures/unions bigger than one word are returned in memory.
6105 Also catch the case where int_size_in_bytes returns -1. In this case
6106 the aggregate is either huge or of variable size, and in either case
6107 we will want to return it via memory and not in a register. */
6108 if (size
< 0 || size
> UNITS_PER_WORD
)
6111 if (TREE_CODE (type
) == RECORD_TYPE
)
6115 /* For a struct the APCS says that we only return in a register
6116 if the type is 'integer like' and every addressable element
6117 has an offset of zero. For practical purposes this means
6118 that the structure can have at most one non bit-field element
6119 and that this element must be the first one in the structure. */
6121 /* Find the first field, ignoring non FIELD_DECL things which will
6122 have been created by C++. */
6123 /* NOTE: This code is deprecated and has not been updated to handle
6124 DECL_FIELD_ABI_IGNORED. */
6125 for (field
= TYPE_FIELDS (type
);
6126 field
&& TREE_CODE (field
) != FIELD_DECL
;
6127 field
= DECL_CHAIN (field
))
6131 return false; /* An empty structure. Allowed by an extension to ANSI C. */
6133 /* Check that the first field is valid for returning in a register. */
6135 /* ... Floats are not allowed */
6136 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
6139 /* ... Aggregates that are not themselves valid for returning in
6140 a register are not allowed. */
6141 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
6144 /* Now check the remaining fields, if any. Only bitfields are allowed,
6145 since they are not addressable. */
6146 for (field
= DECL_CHAIN (field
);
6148 field
= DECL_CHAIN (field
))
6150 if (TREE_CODE (field
) != FIELD_DECL
)
6153 if (!DECL_BIT_FIELD_TYPE (field
))
6160 if (TREE_CODE (type
) == UNION_TYPE
)
6164 /* Unions can be returned in registers if every element is
6165 integral, or can be returned in an integer register. */
6166 for (field
= TYPE_FIELDS (type
);
6168 field
= DECL_CHAIN (field
))
6170 if (TREE_CODE (field
) != FIELD_DECL
)
6173 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
6176 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
6182 #endif /* not ARM_WINCE */
6184 /* Return all other types in memory. */
6188 const struct pcs_attribute_arg
6192 } pcs_attribute_args
[] =
6194 {"aapcs", ARM_PCS_AAPCS
},
6195 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
6197 /* We could recognize these, but changes would be needed elsewhere
6198 * to implement them. */
6199 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
6200 {"atpcs", ARM_PCS_ATPCS
},
6201 {"apcs", ARM_PCS_APCS
},
6203 {NULL
, ARM_PCS_UNKNOWN
}
6207 arm_pcs_from_attribute (tree attr
)
6209 const struct pcs_attribute_arg
*ptr
;
6212 /* Get the value of the argument. */
6213 if (TREE_VALUE (attr
) == NULL_TREE
6214 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
6215 return ARM_PCS_UNKNOWN
;
6217 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
6219 /* Check it against the list of known arguments. */
6220 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
6221 if (streq (arg
, ptr
->arg
))
6224 /* An unrecognized interrupt type. */
6225 return ARM_PCS_UNKNOWN
;
6228 /* Get the PCS variant to use for this call. TYPE is the function's type
6229 specification, DECL is the specific declartion. DECL may be null if
6230 the call could be indirect or if this is a library call. */
6232 arm_get_pcs_model (const_tree type
, const_tree decl ATTRIBUTE_UNUSED
)
6234 bool user_convention
= false;
6235 enum arm_pcs user_pcs
= arm_pcs_default
;
6240 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
6243 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
6244 user_convention
= true;
6247 if (TARGET_AAPCS_BASED
)
6249 /* Detect varargs functions. These always use the base rules
6250 (no argument is ever a candidate for a co-processor
6252 bool base_rules
= stdarg_p (type
);
6254 if (user_convention
)
6256 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
6257 sorry ("non-AAPCS derived PCS variant");
6258 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
6259 error ("variadic functions must use the base AAPCS variant");
6263 return ARM_PCS_AAPCS
;
6264 else if (user_convention
)
6267 /* Unfortunately, this is not safe and can lead to wrong code
6268 being generated (PR96882). Not all calls into the back-end
6269 pass the DECL, so it is unsafe to make any PCS-changing
6270 decisions based on it. In particular the RETURN_IN_MEMORY
6271 hook is only ever passed a TYPE. This needs revisiting to
6272 see if there are any partial improvements that can be
6274 else if (decl
&& flag_unit_at_a_time
)
6276 /* Local functions never leak outside this compilation unit,
6277 so we are free to use whatever conventions are
6279 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6280 cgraph_node
*local_info_node
6281 = cgraph_node::local_info_node (CONST_CAST_TREE (decl
));
6282 if (local_info_node
&& local_info_node
->local
)
6283 return ARM_PCS_AAPCS_LOCAL
;
6287 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
6288 sorry ("PCS variant");
6290 /* For everything else we use the target's default. */
6291 return arm_pcs_default
;
6296 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6297 const_tree fntype ATTRIBUTE_UNUSED
,
6298 rtx libcall ATTRIBUTE_UNUSED
,
6299 const_tree fndecl ATTRIBUTE_UNUSED
)
6301 /* Record the unallocated VFP registers. */
6302 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
6303 pcum
->aapcs_vfp_reg_alloc
= 0;
6306 /* Bitmasks that indicate whether earlier versions of GCC would have
6307 taken a different path through the ABI logic. This should result in
6308 a -Wpsabi warning if the earlier path led to a different ABI decision.
6310 WARN_PSABI_EMPTY_CXX17_BASE
6311 Indicates that the type includes an artificial empty C++17 base field
6312 that, prior to GCC 10.1, would prevent the type from being treated as
6313 a HFA or HVA. See PR94711 for details.
6315 WARN_PSABI_NO_UNIQUE_ADDRESS
6316 Indicates that the type includes an empty [[no_unique_address]] field
6317 that, prior to GCC 10.1, would prevent the type from being treated as
6319 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE
= 1U << 0;
6320 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS
= 1U << 1;
6321 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD
= 1U << 2;
6323 /* Walk down the type tree of TYPE counting consecutive base elements.
6324 If *MODEP is VOIDmode, then set it to the first valid floating point
6325 type. If a non-floating point type is found, or if a floating point
6326 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6327 otherwise return the count in the sub-tree.
6329 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6330 function has changed its behavior relative to earlier versions of GCC.
6331 Normally the argument should be nonnull and point to a zero-initialized
6332 variable. The function then records whether the ABI decision might
6333 be affected by a known fix to the ABI logic, setting the associated
6334 WARN_PSABI_* bits if so.
6336 When the argument is instead a null pointer, the function tries to
6337 simulate the behavior of GCC before all such ABI fixes were made.
6338 This is useful to check whether the function returns something
6339 different after the ABI fixes. */
6341 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
,
6342 unsigned int *warn_psabi_flags
)
6347 switch (TREE_CODE (type
))
6350 mode
= TYPE_MODE (type
);
6351 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
&& mode
!= BFmode
)
6354 if (*modep
== VOIDmode
)
6363 mode
= TYPE_MODE (TREE_TYPE (type
));
6364 if (mode
!= DFmode
&& mode
!= SFmode
)
6367 if (*modep
== VOIDmode
)
6376 /* Use V2SImode and V4SImode as representatives of all 64-bit
6377 and 128-bit vector types, whether or not those modes are
6378 supported with the present options. */
6379 size
= int_size_in_bytes (type
);
6392 if (*modep
== VOIDmode
)
6395 /* Vector modes are considered to be opaque: two vectors are
6396 equivalent for the purposes of being homogeneous aggregates
6397 if they are the same size. */
6406 tree index
= TYPE_DOMAIN (type
);
6408 /* Can't handle incomplete types nor sizes that are not
6410 if (!COMPLETE_TYPE_P (type
)
6411 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6414 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
,
6418 || !TYPE_MAX_VALUE (index
)
6419 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
6420 || !TYPE_MIN_VALUE (index
)
6421 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
6425 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
6426 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
6428 /* There must be no padding. */
6429 if (wi::to_wide (TYPE_SIZE (type
))
6430 != count
* GET_MODE_BITSIZE (*modep
))
6442 /* Can't handle incomplete types nor sizes that are not
6444 if (!COMPLETE_TYPE_P (type
)
6445 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6448 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6450 if (TREE_CODE (field
) != FIELD_DECL
)
6453 if (DECL_FIELD_ABI_IGNORED (field
))
6455 /* See whether this is something that earlier versions of
6456 GCC failed to ignore. */
6458 if (lookup_attribute ("no_unique_address",
6459 DECL_ATTRIBUTES (field
)))
6460 flag
= WARN_PSABI_NO_UNIQUE_ADDRESS
;
6461 else if (cxx17_empty_base_field_p (field
))
6462 flag
= WARN_PSABI_EMPTY_CXX17_BASE
;
6464 /* No compatibility problem. */
6467 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6468 if (warn_psabi_flags
)
6470 *warn_psabi_flags
|= flag
;
6474 /* A zero-width bitfield may affect layout in some
6475 circumstances, but adds no members. The determination
6476 of whether or not a type is an HFA is performed after
6477 layout is complete, so if the type still looks like an
6478 HFA afterwards, it is still classed as one. This is
6479 potentially an ABI break for the hard-float ABI. */
6480 else if (DECL_BIT_FIELD (field
)
6481 && integer_zerop (DECL_SIZE (field
)))
6483 /* Prior to GCC-12 these fields were striped early,
6484 hiding them from the back-end entirely and
6485 resulting in the correct behaviour for argument
6486 passing. Simulate that old behaviour without
6487 generating a warning. */
6488 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field
))
6490 if (warn_psabi_flags
)
6492 *warn_psabi_flags
|= WARN_PSABI_ZERO_WIDTH_BITFIELD
;
6497 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
,
6504 /* There must be no padding. */
6505 if (wi::to_wide (TYPE_SIZE (type
))
6506 != count
* GET_MODE_BITSIZE (*modep
))
6513 case QUAL_UNION_TYPE
:
6515 /* These aren't very interesting except in a degenerate case. */
6520 /* Can't handle incomplete types nor sizes that are not
6522 if (!COMPLETE_TYPE_P (type
)
6523 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6526 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6528 if (TREE_CODE (field
) != FIELD_DECL
)
6531 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
,
6535 count
= count
> sub_count
? count
: sub_count
;
6538 /* There must be no padding. */
6539 if (wi::to_wide (TYPE_SIZE (type
))
6540 != count
* GET_MODE_BITSIZE (*modep
))
6553 /* Return true if PCS_VARIANT should use VFP registers. */
6555 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
6557 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
6559 static bool seen_thumb1_vfp
= false;
6561 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
6563 sorry ("Thumb-1 %<hard-float%> VFP ABI");
6564 /* sorry() is not immediately fatal, so only display this once. */
6565 seen_thumb1_vfp
= true;
6571 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
6574 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
6575 (TARGET_VFP_DOUBLE
|| !is_double
));
6578 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6579 suitable for passing or returning in VFP registers for the PCS
6580 variant selected. If it is, then *BASE_MODE is updated to contain
6581 a machine mode describing each element of the argument's type and
6582 *COUNT to hold the number of such elements. */
6584 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
6585 machine_mode mode
, const_tree type
,
6586 machine_mode
*base_mode
, int *count
)
6588 machine_mode new_mode
= VOIDmode
;
6590 /* If we have the type information, prefer that to working things
6591 out from the mode. */
6594 unsigned int warn_psabi_flags
= 0;
6595 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
,
6597 if (ag_count
> 0 && ag_count
<= 4)
6599 static unsigned last_reported_type_uid
;
6600 unsigned uid
= TYPE_UID (TYPE_MAIN_VARIANT (type
));
6604 && uid
!= last_reported_type_uid
6605 && ((alt
= aapcs_vfp_sub_candidate (type
, &new_mode
, NULL
))
6609 = CHANGES_ROOT_URL
"gcc-10/changes.html#empty_base";
6611 = CHANGES_ROOT_URL
"gcc-12/changes.html#zero_width_bitfields";
6612 gcc_assert (alt
== -1);
6613 last_reported_type_uid
= uid
;
6614 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6616 if (warn_psabi_flags
& WARN_PSABI_NO_UNIQUE_ADDRESS
)
6617 inform (input_location
, "parameter passing for argument of "
6618 "type %qT with %<[[no_unique_address]]%> members "
6619 "changed %{in GCC 10.1%}",
6620 TYPE_MAIN_VARIANT (type
), url10
);
6621 else if (warn_psabi_flags
& WARN_PSABI_EMPTY_CXX17_BASE
)
6622 inform (input_location
, "parameter passing for argument of "
6623 "type %qT when C++17 is enabled changed to match "
6624 "C++14 %{in GCC 10.1%}",
6625 TYPE_MAIN_VARIANT (type
), url10
);
6626 else if (warn_psabi_flags
& WARN_PSABI_ZERO_WIDTH_BITFIELD
)
6627 inform (input_location
, "parameter passing for argument of "
6628 "type %qT changed %{in GCC 12.1%}",
6629 TYPE_MAIN_VARIANT (type
), url12
);
6636 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
6637 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6638 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6643 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6646 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
6652 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
6655 *base_mode
= new_mode
;
6657 if (TARGET_GENERAL_REGS_ONLY
)
6658 error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6665 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
6666 machine_mode mode
, const_tree type
)
6668 int count ATTRIBUTE_UNUSED
;
6669 machine_mode ag_mode ATTRIBUTE_UNUSED
;
6671 if (!use_vfp_abi (pcs_variant
, false))
6673 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6678 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6681 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6684 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6685 &pcum
->aapcs_vfp_rmode
,
6686 &pcum
->aapcs_vfp_rcount
);
6689 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6690 for the behaviour of this function. */
6693 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6694 const_tree type ATTRIBUTE_UNUSED
)
6697 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6698 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6699 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6702 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6703 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6705 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6707 || (mode
== TImode
&& ! (TARGET_NEON
|| TARGET_HAVE_MVE
))
6708 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6711 int rcount
= pcum
->aapcs_vfp_rcount
;
6713 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6715 if (!(TARGET_NEON
|| TARGET_HAVE_MVE
))
6717 /* Avoid using unsupported vector modes. */
6718 if (rmode
== V2SImode
)
6720 else if (rmode
== V4SImode
)
6727 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6728 for (i
= 0; i
< rcount
; i
++)
6730 rtx tmp
= gen_rtx_REG (rmode
,
6731 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6732 tmp
= gen_rtx_EXPR_LIST
6734 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6735 XVECEXP (par
, 0, i
) = tmp
;
6738 pcum
->aapcs_reg
= par
;
6741 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6747 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6748 comment there for the behaviour of this function. */
6751 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6753 const_tree type ATTRIBUTE_UNUSED
)
6755 if (!use_vfp_abi (pcs_variant
, false))
6759 || (GET_MODE_CLASS (mode
) == MODE_INT
6760 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6761 && !(TARGET_NEON
|| TARGET_HAVE_MVE
)))
6764 machine_mode ag_mode
;
6769 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6772 if (!(TARGET_NEON
|| TARGET_HAVE_MVE
))
6774 if (ag_mode
== V2SImode
)
6776 else if (ag_mode
== V4SImode
)
6782 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6783 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6784 for (i
= 0; i
< count
; i
++)
6786 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6787 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6788 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6789 XVECEXP (par
, 0, i
) = tmp
;
6795 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6799 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6800 machine_mode mode ATTRIBUTE_UNUSED
,
6801 const_tree type ATTRIBUTE_UNUSED
)
6803 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6804 pcum
->aapcs_vfp_reg_alloc
= 0;
6808 #define AAPCS_CP(X) \
6810 aapcs_ ## X ## _cum_init, \
6811 aapcs_ ## X ## _is_call_candidate, \
6812 aapcs_ ## X ## _allocate, \
6813 aapcs_ ## X ## _is_return_candidate, \
6814 aapcs_ ## X ## _allocate_return_reg, \
6815 aapcs_ ## X ## _advance \
6818 /* Table of co-processors that can be used to pass arguments in
6819 registers. Idealy no arugment should be a candidate for more than
6820 one co-processor table entry, but the table is processed in order
6821 and stops after the first match. If that entry then fails to put
6822 the argument into a co-processor register, the argument will go on
6826 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6827 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6829 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6830 BLKmode) is a candidate for this co-processor's registers; this
6831 function should ignore any position-dependent state in
6832 CUMULATIVE_ARGS and only use call-type dependent information. */
6833 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6835 /* Return true if the argument does get a co-processor register; it
6836 should set aapcs_reg to an RTX of the register allocated as is
6837 required for a return from FUNCTION_ARG. */
6838 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6840 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6841 be returned in this co-processor's registers. */
6842 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6844 /* Allocate and return an RTX element to hold the return type of a call. This
6845 routine must not fail and will only be called if is_return_candidate
6846 returned true with the same parameters. */
6847 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6849 /* Finish processing this argument and prepare to start processing
6851 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6852 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6860 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6865 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6866 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6873 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6875 /* We aren't passed a decl, so we can't check that a call is local.
6876 However, it isn't clear that that would be a win anyway, since it
6877 might limit some tail-calling opportunities. */
6878 enum arm_pcs pcs_variant
;
6882 const_tree fndecl
= NULL_TREE
;
6884 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6887 fntype
= TREE_TYPE (fntype
);
6890 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6893 pcs_variant
= arm_pcs_default
;
6895 if (pcs_variant
!= ARM_PCS_AAPCS
)
6899 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6900 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6909 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6912 /* We aren't passed a decl, so we can't check that a call is local.
6913 However, it isn't clear that that would be a win anyway, since it
6914 might limit some tail-calling opportunities. */
6915 enum arm_pcs pcs_variant
;
6916 int unsignedp ATTRIBUTE_UNUSED
;
6920 const_tree fndecl
= NULL_TREE
;
6922 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6925 fntype
= TREE_TYPE (fntype
);
6928 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6931 pcs_variant
= arm_pcs_default
;
6933 /* Promote integer types. */
6934 if (type
&& INTEGRAL_TYPE_P (type
))
6935 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6937 if (pcs_variant
!= ARM_PCS_AAPCS
)
6941 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6942 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6944 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6948 /* Promotes small structs returned in a register to full-word size
6949 for big-endian AAPCS. */
6950 if (type
&& arm_return_in_msb (type
))
6952 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6953 if (size
% UNITS_PER_WORD
!= 0)
6955 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6956 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
6960 return gen_rtx_REG (mode
, R0_REGNUM
);
6964 aapcs_libcall_value (machine_mode mode
)
6966 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6967 && GET_MODE_SIZE (mode
) <= 4)
6970 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6973 /* Lay out a function argument using the AAPCS rules. The rule
6974 numbers referred to here are those in the AAPCS. */
6976 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6977 const_tree type
, bool named
)
6982 /* We only need to do this once per argument. */
6983 if (pcum
->aapcs_arg_processed
)
6986 pcum
->aapcs_arg_processed
= true;
6988 /* Special case: if named is false then we are handling an incoming
6989 anonymous argument which is on the stack. */
6993 /* Is this a potential co-processor register candidate? */
6994 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6996 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6997 pcum
->aapcs_cprc_slot
= slot
;
6999 /* We don't have to apply any of the rules from part B of the
7000 preparation phase, these are handled elsewhere in the
7005 /* A Co-processor register candidate goes either in its own
7006 class of registers or on the stack. */
7007 if (!pcum
->aapcs_cprc_failed
[slot
])
7009 /* C1.cp - Try to allocate the argument to co-processor
7011 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
7014 /* C2.cp - Put the argument on the stack and note that we
7015 can't assign any more candidates in this slot. We also
7016 need to note that we have allocated stack space, so that
7017 we won't later try to split a non-cprc candidate between
7018 core registers and the stack. */
7019 pcum
->aapcs_cprc_failed
[slot
] = true;
7020 pcum
->can_split
= false;
7023 /* We didn't get a register, so this argument goes on the
7025 gcc_assert (pcum
->can_split
== false);
7030 /* C3 - For double-word aligned arguments, round the NCRN up to the
7031 next even number. */
7032 ncrn
= pcum
->aapcs_ncrn
;
7035 int res
= arm_needs_doubleword_align (mode
, type
);
7036 /* Only warn during RTL expansion of call stmts, otherwise we would
7037 warn e.g. during gimplification even on functions that will be
7038 always inlined, and we'd warn multiple times. Don't warn when
7039 called in expand_function_start either, as we warn instead in
7040 arm_function_arg_boundary in that case. */
7041 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
7042 inform (input_location
, "parameter passing for argument of type "
7043 "%qT changed in GCC 7.1", type
);
7048 nregs
= ARM_NUM_REGS2(mode
, type
);
7050 /* Sigh, this test should really assert that nregs > 0, but a GCC
7051 extension allows empty structs and then gives them empty size; it
7052 then allows such a structure to be passed by value. For some of
7053 the code below we have to pretend that such an argument has
7054 non-zero size so that we 'locate' it correctly either in
7055 registers or on the stack. */
7056 gcc_assert (nregs
>= 0);
7058 nregs2
= nregs
? nregs
: 1;
7060 /* C4 - Argument fits entirely in core registers. */
7061 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
7063 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
7064 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
7068 /* C5 - Some core registers left and there are no arguments already
7069 on the stack: split this argument between the remaining core
7070 registers and the stack. */
7071 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
7073 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
7074 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
7075 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
7079 /* C6 - NCRN is set to 4. */
7080 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
7082 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
7086 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7087 for a call to a function whose data type is FNTYPE.
7088 For a library call, FNTYPE is NULL. */
7090 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
7092 tree fndecl ATTRIBUTE_UNUSED
)
7094 /* Long call handling. */
7096 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
7098 pcum
->pcs_variant
= arm_pcs_default
;
7100 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7102 if (arm_libcall_uses_aapcs_base (libname
))
7103 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
7105 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
7106 pcum
->aapcs_reg
= NULL_RTX
;
7107 pcum
->aapcs_partial
= 0;
7108 pcum
->aapcs_arg_processed
= false;
7109 pcum
->aapcs_cprc_slot
= -1;
7110 pcum
->can_split
= true;
7112 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
7116 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
7118 pcum
->aapcs_cprc_failed
[i
] = false;
7119 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
7127 /* On the ARM, the offset starts at 0. */
7129 pcum
->iwmmxt_nregs
= 0;
7130 pcum
->can_split
= true;
7132 /* Varargs vectors are treated the same as long long.
7133 named_count avoids having to change the way arm handles 'named' */
7134 pcum
->named_count
= 0;
7137 if (TARGET_REALLY_IWMMXT
&& fntype
)
7141 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
7143 fn_arg
= TREE_CHAIN (fn_arg
))
7144 pcum
->named_count
+= 1;
7146 if (! pcum
->named_count
)
7147 pcum
->named_count
= INT_MAX
;
7151 /* Return 2 if double word alignment is required for argument passing,
7152 but wasn't required before the fix for PR88469.
7153 Return 1 if double word alignment is required for argument passing.
7154 Return -1 if double word alignment used to be required for argument
7155 passing before PR77728 ABI fix, but is not required anymore.
7156 Return 0 if double word alignment is not required and wasn't requried
7159 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
7162 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
7164 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
7165 if (!AGGREGATE_TYPE_P (type
))
7166 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
7168 /* Array types: Use member alignment of element type. */
7169 if (TREE_CODE (type
) == ARRAY_TYPE
)
7170 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
7174 /* Record/aggregate types: Use greatest member alignment of any member.
7176 Note that we explicitly consider zero-sized fields here, even though
7177 they don't map to AAPCS machine types. For example, in:
7179 struct __attribute__((aligned(8))) empty {};
7182 [[no_unique_address]] empty e;
7186 "s" contains only one Fundamental Data Type (the int field)
7187 but gains 8-byte alignment and size thanks to "e". */
7188 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7189 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
7191 if (TREE_CODE (field
) == FIELD_DECL
)
7194 /* Before PR77728 fix, we were incorrectly considering also
7195 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7196 Make sure we can warn about that with -Wpsabi. */
7199 else if (TREE_CODE (field
) == FIELD_DECL
7200 && DECL_BIT_FIELD_TYPE (field
)
7201 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field
)) > PARM_BOUNDARY
)
7211 /* Determine where to put an argument to a function.
7212 Value is zero to push the argument on the stack,
7213 or a hard register in which to store the argument.
7215 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7216 the preceding args and about the function being called.
7217 ARG is a description of the argument.
7219 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7220 other arguments are passed on the stack. If (NAMED == 0) (which happens
7221 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7222 defined), say it is passed in the stack (function_prologue will
7223 indeed make it pass in the stack if necessary). */
7226 arm_function_arg (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
7228 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7231 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7232 a call insn (op3 of a call_value insn). */
7233 if (arg
.end_marker_p ())
7236 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7238 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7239 return pcum
->aapcs_reg
;
7242 /* Varargs vectors are treated the same as long long.
7243 named_count avoids having to change the way arm handles 'named' */
7244 if (TARGET_IWMMXT_ABI
7245 && arm_vector_mode_supported_p (arg
.mode
)
7246 && pcum
->named_count
> pcum
->nargs
+ 1)
7248 if (pcum
->iwmmxt_nregs
<= 9)
7249 return gen_rtx_REG (arg
.mode
,
7250 pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
7253 pcum
->can_split
= false;
7258 /* Put doubleword aligned quantities in even register pairs. */
7259 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
7261 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
7262 if (res
< 0 && warn_psabi
)
7263 inform (input_location
, "parameter passing for argument of type "
7264 "%qT changed in GCC 7.1", arg
.type
);
7268 if (res
> 1 && warn_psabi
)
7269 inform (input_location
, "parameter passing for argument of type "
7270 "%qT changed in GCC 9.1", arg
.type
);
7274 /* Only allow splitting an arg between regs and memory if all preceding
7275 args were allocated to regs. For args passed by reference we only count
7276 the reference pointer. */
7277 if (pcum
->can_split
)
7280 nregs
= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
7282 if (!arg
.named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
7285 return gen_rtx_REG (arg
.mode
, pcum
->nregs
);
7289 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
7291 if (!ARM_DOUBLEWORD_ALIGN
)
7292 return PARM_BOUNDARY
;
7294 int res
= arm_needs_doubleword_align (mode
, type
);
7295 if (res
< 0 && warn_psabi
)
7296 inform (input_location
, "parameter passing for argument of type %qT "
7297 "changed in GCC 7.1", type
);
7298 if (res
> 1 && warn_psabi
)
7299 inform (input_location
, "parameter passing for argument of type "
7300 "%qT changed in GCC 9.1", type
);
7302 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
7306 arm_arg_partial_bytes (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
7308 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7309 int nregs
= pcum
->nregs
;
7311 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7313 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7314 return pcum
->aapcs_partial
;
7317 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (arg
.mode
))
7320 if (NUM_ARG_REGS
> nregs
7321 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (arg
.mode
, arg
.type
))
7323 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
7328 /* Update the data in PCUM to advance over argument ARG. */
7331 arm_function_arg_advance (cumulative_args_t pcum_v
,
7332 const function_arg_info
&arg
)
7334 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7336 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7338 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7340 if (pcum
->aapcs_cprc_slot
>= 0)
7342 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, arg
.mode
,
7344 pcum
->aapcs_cprc_slot
= -1;
7347 /* Generic stuff. */
7348 pcum
->aapcs_arg_processed
= false;
7349 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
7350 pcum
->aapcs_reg
= NULL_RTX
;
7351 pcum
->aapcs_partial
= 0;
7356 if (arm_vector_mode_supported_p (arg
.mode
)
7357 && pcum
->named_count
> pcum
->nargs
7358 && TARGET_IWMMXT_ABI
)
7359 pcum
->iwmmxt_nregs
+= 1;
7361 pcum
->nregs
+= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
7365 /* Variable sized types are passed by reference. This is a GCC
7366 extension to the ARM ABI. */
7369 arm_pass_by_reference (cumulative_args_t
, const function_arg_info
&arg
)
7371 return arg
.type
&& TREE_CODE (TYPE_SIZE (arg
.type
)) != INTEGER_CST
;
7374 /* Encode the current state of the #pragma [no_]long_calls. */
7377 OFF
, /* No #pragma [no_]long_calls is in effect. */
7378 LONG
, /* #pragma long_calls is in effect. */
7379 SHORT
/* #pragma no_long_calls is in effect. */
7382 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
7385 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7387 arm_pragma_long_calls
= LONG
;
7391 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7393 arm_pragma_long_calls
= SHORT
;
7397 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7399 arm_pragma_long_calls
= OFF
;
7402 /* Handle an attribute requiring a FUNCTION_DECL;
7403 arguments as in struct attribute_spec.handler. */
7405 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
7406 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7408 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7410 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7412 *no_add_attrs
= true;
7418 /* Handle an "interrupt" or "isr" attribute;
7419 arguments as in struct attribute_spec.handler. */
7421 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
7426 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7428 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7430 *no_add_attrs
= true;
7432 else if (TARGET_VFP_BASE
)
7434 warning (OPT_Wattributes
, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7437 /* FIXME: the argument if any is checked for type attributes;
7438 should it be checked for decl ones? */
7442 if (TREE_CODE (*node
) == FUNCTION_TYPE
7443 || TREE_CODE (*node
) == METHOD_TYPE
)
7445 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
7447 warning (OPT_Wattributes
, "%qE attribute ignored",
7449 *no_add_attrs
= true;
7452 else if (TREE_CODE (*node
) == POINTER_TYPE
7453 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
7454 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
7455 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
7457 *node
= build_variant_type_copy (*node
);
7458 TREE_TYPE (*node
) = build_type_attribute_variant
7460 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
7461 *no_add_attrs
= true;
7465 /* Possibly pass this attribute on from the type to a decl. */
7466 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
7467 | (int) ATTR_FLAG_FUNCTION_NEXT
7468 | (int) ATTR_FLAG_ARRAY_NEXT
))
7470 *no_add_attrs
= true;
7471 return tree_cons (name
, args
, NULL_TREE
);
7475 warning (OPT_Wattributes
, "%qE attribute ignored",
7484 /* Handle a "pcs" attribute; arguments as in struct
7485 attribute_spec.handler. */
7487 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
7488 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7490 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
7492 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
7493 *no_add_attrs
= true;
7498 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7499 /* Handle the "notshared" attribute. This attribute is another way of
7500 requesting hidden visibility. ARM's compiler supports
7501 "__declspec(notshared)"; we support the same thing via an
7505 arm_handle_notshared_attribute (tree
*node
,
7506 tree name ATTRIBUTE_UNUSED
,
7507 tree args ATTRIBUTE_UNUSED
,
7508 int flags ATTRIBUTE_UNUSED
,
7511 tree decl
= TYPE_NAME (*node
);
7515 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
7516 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
7517 *no_add_attrs
= false;
7523 /* This function returns true if a function with declaration FNDECL and type
7524 FNTYPE uses the stack to pass arguments or return variables and false
7525 otherwise. This is used for functions with the attributes
7526 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7527 diagnostic messages if the stack is used. NAME is the name of the attribute
7531 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
7533 function_args_iterator args_iter
;
7534 CUMULATIVE_ARGS args_so_far_v
;
7535 cumulative_args_t args_so_far
;
7536 bool first_param
= true;
7537 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
7539 /* Error out if any argument is passed on the stack. */
7540 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
7541 args_so_far
= pack_cumulative_args (&args_so_far_v
);
7542 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
7546 prev_arg_type
= arg_type
;
7547 if (VOID_TYPE_P (arg_type
))
7550 function_arg_info
arg (arg_type
, /*named=*/true);
7552 /* ??? We should advance after processing the argument and pass
7553 the argument we're advancing past. */
7554 arm_function_arg_advance (args_so_far
, arg
);
7555 arg_rtx
= arm_function_arg (args_so_far
, arg
);
7556 if (!arg_rtx
|| arm_arg_partial_bytes (args_so_far
, arg
))
7558 error ("%qE attribute not available to functions with arguments "
7559 "passed on the stack", name
);
7562 first_param
= false;
7565 /* Error out for variadic functions since we cannot control how many
7566 arguments will be passed and thus stack could be used. stdarg_p () is not
7567 used for the checking to avoid browsing arguments twice. */
7568 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
7570 error ("%qE attribute not available to functions with variable number "
7571 "of arguments", name
);
7575 /* Error out if return value is passed on the stack. */
7576 ret_type
= TREE_TYPE (fntype
);
7577 if (arm_return_in_memory (ret_type
, fntype
))
7579 error ("%qE attribute not available to functions that return value on "
7586 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7587 function will check whether the attribute is allowed here and will add the
7588 attribute to the function declaration tree or otherwise issue a warning. */
7591 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
7600 *no_add_attrs
= true;
7601 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7606 /* Ignore attribute for function types. */
7607 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7609 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7611 *no_add_attrs
= true;
7617 /* Warn for static linkage functions. */
7618 if (!TREE_PUBLIC (fndecl
))
7620 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
7621 "with static linkage", name
);
7622 *no_add_attrs
= true;
7626 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
7627 TREE_TYPE (fndecl
));
7632 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7633 function will check whether the attribute is allowed here and will add the
7634 attribute to the function type tree or otherwise issue a diagnostic. The
7635 reason we check this at declaration time is to only allow the use of the
7636 attribute with declarations of function pointers and not function
7637 declarations. This function checks NODE is of the expected type and issues
7638 diagnostics otherwise using NAME. If it is not of the expected type
7639 *NO_ADD_ATTRS will be set to true. */
7642 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
7647 tree decl
= NULL_TREE
;
7652 *no_add_attrs
= true;
7653 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7660 fntype
= TREE_TYPE (*node
);
7662 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
7668 while (fntype
&& TREE_CODE (fntype
) == POINTER_TYPE
)
7669 fntype
= TREE_TYPE (fntype
);
7671 if ((DECL_P (*node
) && !decl
) || TREE_CODE (fntype
) != FUNCTION_TYPE
)
7673 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
7674 "function pointer", name
);
7675 *no_add_attrs
= true;
7679 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
7684 /* Prevent trees being shared among function types with and without
7685 cmse_nonsecure_call attribute. */
7688 type
= build_distinct_type_copy (TREE_TYPE (decl
));
7689 TREE_TYPE (decl
) = type
;
7693 type
= build_distinct_type_copy (*node
);
7699 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
7702 fntype
= TREE_TYPE (fntype
);
7703 fntype
= build_distinct_type_copy (fntype
);
7704 TREE_TYPE (type
) = fntype
;
7707 /* Construct a type attribute and add it to the function type. */
7708 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
7709 TYPE_ATTRIBUTES (fntype
));
7710 TYPE_ATTRIBUTES (fntype
) = attrs
;
7714 /* Return 0 if the attributes for two types are incompatible, 1 if they
7715 are compatible, and 2 if they are nearly compatible (which causes a
7716 warning to be generated). */
7718 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7722 tree attrs1
= lookup_attribute ("Advanced SIMD type",
7723 TYPE_ATTRIBUTES (type1
));
7724 tree attrs2
= lookup_attribute ("Advanced SIMD type",
7725 TYPE_ATTRIBUTES (type2
));
7726 if (bool (attrs1
) != bool (attrs2
))
7728 if (attrs1
&& !attribute_value_equal (attrs1
, attrs2
))
7731 /* Check for mismatch of non-default calling convention. */
7732 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7735 /* Check for mismatched call attributes. */
7736 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7737 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7738 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7739 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7741 /* Only bother to check if an attribute is defined. */
7742 if (l1
| l2
| s1
| s2
)
7744 /* If one type has an attribute, the other must have the same attribute. */
7745 if ((l1
!= l2
) || (s1
!= s2
))
7748 /* Disallow mixed attributes. */
7749 if ((l1
& s2
) || (l2
& s1
))
7753 /* Check for mismatched ISR attribute. */
7754 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7756 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7757 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7759 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7763 l1
= lookup_attribute ("cmse_nonsecure_call",
7764 TYPE_ATTRIBUTES (type1
)) != NULL
;
7765 l2
= lookup_attribute ("cmse_nonsecure_call",
7766 TYPE_ATTRIBUTES (type2
)) != NULL
;
7774 /* Assigns default attributes to newly defined type. This is used to
7775 set short_call/long_call attributes for function types of
7776 functions defined inside corresponding #pragma scopes. */
7778 arm_set_default_type_attributes (tree type
)
7780 /* Add __attribute__ ((long_call)) to all functions, when
7781 inside #pragma long_calls or __attribute__ ((short_call)),
7782 when inside #pragma no_long_calls. */
7783 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7785 tree type_attr_list
, attr_name
;
7786 type_attr_list
= TYPE_ATTRIBUTES (type
);
7788 if (arm_pragma_long_calls
== LONG
)
7789 attr_name
= get_identifier ("long_call");
7790 else if (arm_pragma_long_calls
== SHORT
)
7791 attr_name
= get_identifier ("short_call");
7795 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7796 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7800 /* Return true if DECL is known to be linked into section SECTION. */
7803 arm_function_in_section_p (tree decl
, section
*section
)
7805 /* We can only be certain about the prevailing symbol definition. */
7806 if (!decl_binds_to_current_def_p (decl
))
7809 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7810 if (!DECL_SECTION_NAME (decl
))
7812 /* Make sure that we will not create a unique section for DECL. */
7813 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7817 return function_section (decl
) == section
;
7820 /* Return nonzero if a 32-bit "long_call" should be generated for
7821 a call from the current function to DECL. We generate a long_call
7824 a. has an __attribute__((long call))
7825 or b. is within the scope of a #pragma long_calls
7826 or c. the -mlong-calls command line switch has been specified
7828 However we do not generate a long call if the function:
7830 d. has an __attribute__ ((short_call))
7831 or e. is inside the scope of a #pragma no_long_calls
7832 or f. is defined in the same section as the current function. */
7835 arm_is_long_call_p (tree decl
)
7840 return TARGET_LONG_CALLS
;
7842 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7843 if (lookup_attribute ("short_call", attrs
))
7846 /* For "f", be conservative, and only cater for cases in which the
7847 whole of the current function is placed in the same section. */
7848 if (!flag_reorder_blocks_and_partition
7849 && TREE_CODE (decl
) == FUNCTION_DECL
7850 && arm_function_in_section_p (decl
, current_function_section ()))
7853 if (lookup_attribute ("long_call", attrs
))
7856 return TARGET_LONG_CALLS
;
7859 /* Return nonzero if it is ok to make a tail-call to DECL. */
7861 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7863 unsigned long func_type
;
7865 if (cfun
->machine
->sibcall_blocked
)
7870 /* In FDPIC, never tailcall something for which we have no decl:
7871 the target function could be in a different module, requiring
7872 a different FDPIC register value. */
7877 /* Never tailcall something if we are generating code for Thumb-1. */
7881 /* The PIC register is live on entry to VxWorks PLT entries, so we
7882 must make the call before restoring the PIC register. */
7883 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7886 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7887 may be used both as target of the call and base register for restoring
7888 the VFP registers */
7889 if (TARGET_APCS_FRAME
&& TARGET_ARM
7890 && TARGET_HARD_FLOAT
7891 && decl
&& arm_is_long_call_p (decl
))
7894 /* If we are interworking and the function is not declared static
7895 then we can't tail-call it unless we know that it exists in this
7896 compilation unit (since it might be a Thumb routine). */
7897 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7898 && !TREE_ASM_WRITTEN (decl
))
7901 func_type
= arm_current_func_type ();
7902 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7903 if (IS_INTERRUPT (func_type
))
7906 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7907 generated for entry functions themselves. */
7908 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7911 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7912 this would complicate matters for later code generation. */
7913 if (TREE_CODE (exp
) == CALL_EXPR
)
7915 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7916 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7920 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7922 /* Check that the return value locations are the same. For
7923 example that we aren't returning a value from the sibling in
7924 a VFP register but then need to transfer it to a core
7927 tree decl_or_type
= decl
;
7929 /* If it is an indirect function pointer, get the function type. */
7931 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7933 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7934 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7936 if (!rtx_equal_p (a
, b
))
7940 /* Never tailcall if function may be called with a misaligned SP. */
7941 if (IS_STACKALIGN (func_type
))
7944 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7945 references should become a NOP. Don't convert such calls into
7947 if (TARGET_AAPCS_BASED
7948 && arm_abi
== ARM_ABI_AAPCS
7950 && DECL_WEAK (decl
))
7953 /* We cannot do a tailcall for an indirect call by descriptor if all the
7954 argument registers are used because the only register left to load the
7955 address is IP and it will already contain the static chain. */
7956 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7958 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7959 CUMULATIVE_ARGS cum
;
7960 cumulative_args_t cum_v
;
7962 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7963 cum_v
= pack_cumulative_args (&cum
);
7965 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7967 tree type
= TREE_VALUE (t
);
7968 if (!VOID_TYPE_P (type
))
7970 function_arg_info
arg (type
, /*named=*/true);
7971 arm_function_arg_advance (cum_v
, arg
);
7975 function_arg_info
arg (integer_type_node
, /*named=*/true);
7976 if (!arm_function_arg (cum_v
, arg
))
7980 /* Everything else is ok. */
7985 /* Addressing mode support functions. */
7987 /* Return nonzero if X is a legitimate immediate operand when compiling
7988 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7990 legitimate_pic_operand_p (rtx x
)
7992 if (SYMBOL_REF_P (x
)
7993 || (GET_CODE (x
) == CONST
7994 && GET_CODE (XEXP (x
, 0)) == PLUS
7995 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
8001 /* Record that the current function needs a PIC register. If PIC_REG is null,
8002 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
8003 both case cfun->machine->pic_reg is initialized if we have not already done
8004 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
8005 PIC register is reloaded in the current position of the instruction stream
8006 irregardless of whether it was loaded before. Otherwise, it is only loaded
8007 if not already done so (crtl->uses_pic_offset_table is null). Note that
8008 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
8009 is only supported iff COMPUTE_NOW is false. */
8012 require_pic_register (rtx pic_reg
, bool compute_now
)
8014 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
8016 /* A lot of the logic here is made obscure by the fact that this
8017 routine gets called as part of the rtx cost estimation process.
8018 We don't want those calls to affect any assumptions about the real
8019 function; and further, we can't call entry_of_function() until we
8020 start the real expansion process. */
8021 if (!crtl
->uses_pic_offset_table
|| compute_now
)
8023 gcc_assert (can_create_pseudo_p ()
8024 || (pic_reg
!= NULL_RTX
8026 && GET_MODE (pic_reg
) == Pmode
));
8027 if (arm_pic_register
!= INVALID_REGNUM
8029 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
8031 if (!cfun
->machine
->pic_reg
)
8032 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
8034 /* Play games to avoid marking the function as needing pic
8035 if we are being called as part of the cost-estimation
8037 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
8038 crtl
->uses_pic_offset_table
= 1;
8042 rtx_insn
*seq
, *insn
;
8044 if (pic_reg
== NULL_RTX
)
8045 pic_reg
= gen_reg_rtx (Pmode
);
8046 if (!cfun
->machine
->pic_reg
)
8047 cfun
->machine
->pic_reg
= pic_reg
;
8049 /* Play games to avoid marking the function as needing pic
8050 if we are being called as part of the cost-estimation
8052 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
8054 crtl
->uses_pic_offset_table
= 1;
8057 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
8058 && arm_pic_register
> LAST_LO_REGNUM
8060 emit_move_insn (cfun
->machine
->pic_reg
,
8061 gen_rtx_REG (Pmode
, arm_pic_register
));
8063 arm_load_pic_register (0UL, pic_reg
);
8068 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
8070 INSN_LOCATION (insn
) = prologue_location
;
8072 /* We can be called during expansion of PHI nodes, where
8073 we can't yet emit instructions directly in the final
8074 insn stream. Queue the insns on the entry edge, they will
8075 be committed after everything else is expanded. */
8076 if (currently_expanding_to_rtl
)
8077 insert_insn_on_edge (seq
,
8079 (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
8087 /* Generate insns to calculate the address of ORIG in pic mode. */
8089 calculate_pic_address_constant (rtx reg
, rtx pic_reg
, rtx orig
)
8094 pat
= gen_calculate_pic_address (reg
, pic_reg
, orig
);
8096 /* Make the MEM as close to a constant as possible. */
8097 mem
= SET_SRC (pat
);
8098 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
8099 MEM_READONLY_P (mem
) = 1;
8100 MEM_NOTRAP_P (mem
) = 1;
8102 return emit_insn (pat
);
8105 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
8106 created to hold the result of the load. If not NULL, PIC_REG indicates
8107 which register to use as PIC register, otherwise it is decided by register
8108 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
8109 location in the instruction stream, irregardless of whether it was loaded
8110 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8111 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8113 Returns the register REG into which the PIC load is performed. */
8116 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
, rtx pic_reg
,
8119 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
8121 if (SYMBOL_REF_P (orig
)
8122 || LABEL_REF_P (orig
))
8126 gcc_assert (can_create_pseudo_p ());
8127 reg
= gen_reg_rtx (Pmode
);
8130 /* VxWorks does not impose a fixed gap between segments; the run-time
8131 gap can be different from the object-file gap. We therefore can't
8132 use GOTOFF unless we are absolutely sure that the symbol is in the
8133 same segment as the GOT. Unfortunately, the flexibility of linker
8134 scripts means that we can't be sure of that in general, so assume
8135 that GOTOFF is never valid on VxWorks. */
8136 /* References to weak symbols cannot be resolved locally: they
8137 may be overridden by a non-weak definition at link time. */
8139 if ((LABEL_REF_P (orig
)
8140 || (SYMBOL_REF_P (orig
)
8141 && SYMBOL_REF_LOCAL_P (orig
)
8142 && (SYMBOL_REF_DECL (orig
)
8143 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)
8144 && (!SYMBOL_REF_FUNCTION_P (orig
)
8145 || arm_fdpic_local_funcdesc_p (orig
))))
8147 && arm_pic_data_is_text_relative
)
8148 insn
= arm_pic_static_addr (orig
, reg
);
8151 /* If this function doesn't have a pic register, create one now. */
8152 require_pic_register (pic_reg
, compute_now
);
8154 if (pic_reg
== NULL_RTX
)
8155 pic_reg
= cfun
->machine
->pic_reg
;
8157 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
8160 /* Put a REG_EQUAL note on this insn, so that it can be optimized
8162 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
8166 else if (GET_CODE (orig
) == CONST
)
8170 if (GET_CODE (XEXP (orig
, 0)) == PLUS
8171 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
8174 /* Handle the case where we have: const (UNSPEC_TLS). */
8175 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
8176 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
8179 /* Handle the case where we have:
8180 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
8182 if (GET_CODE (XEXP (orig
, 0)) == PLUS
8183 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
8184 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
8186 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
8192 gcc_assert (can_create_pseudo_p ());
8193 reg
= gen_reg_rtx (Pmode
);
8196 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
8198 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
,
8199 pic_reg
, compute_now
);
8200 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
8201 base
== reg
? 0 : reg
, pic_reg
,
8204 if (CONST_INT_P (offset
))
8206 /* The base register doesn't really matter, we only want to
8207 test the index for the appropriate mode. */
8208 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
8210 gcc_assert (can_create_pseudo_p ());
8211 offset
= force_reg (Pmode
, offset
);
8214 if (CONST_INT_P (offset
))
8215 return plus_constant (Pmode
, base
, INTVAL (offset
));
8218 if (GET_MODE_SIZE (mode
) > 4
8219 && (GET_MODE_CLASS (mode
) == MODE_INT
8220 || TARGET_SOFT_FLOAT
))
8222 emit_insn (gen_addsi3 (reg
, base
, offset
));
8226 return gen_rtx_PLUS (Pmode
, base
, offset
);
8233 /* Generate insns that produce the address of the stack canary */
8235 arm_stack_protect_tls_canary_mem (bool reload
)
8237 rtx tp
= gen_reg_rtx (SImode
);
8239 emit_insn (gen_reload_tp_hard (tp
));
8241 emit_insn (gen_load_tp_hard (tp
));
8243 rtx reg
= gen_reg_rtx (SImode
);
8244 rtx offset
= GEN_INT (arm_stack_protector_guard_offset
);
8245 emit_set_insn (reg
, gen_rtx_PLUS (SImode
, tp
, offset
));
8246 return gen_rtx_MEM (SImode
, reg
);
8250 /* Whether a register is callee saved or not. This is necessary because high
8251 registers are marked as caller saved when optimizing for size on Thumb-1
8252 targets despite being callee saved in order to avoid using them. */
8253 #define callee_saved_reg_p(reg) \
8254 (!call_used_or_fixed_reg_p (reg) \
8255 || (TARGET_THUMB1 && optimize_size \
8256 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8258 /* Return a mask for the call-clobbered low registers that are unused
8259 at the end of the prologue. */
8260 static unsigned long
8261 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8263 unsigned long mask
= 0;
8264 bitmap prologue_live_out
= df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
8266 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
8267 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (prologue_live_out
, reg
))
8268 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
8272 /* Similarly for the start of the epilogue. */
8273 static unsigned long
8274 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8276 unsigned long mask
= 0;
8277 bitmap epilogue_live_in
= df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun
));
8279 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
8280 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (epilogue_live_in
, reg
))
8281 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
8285 /* Find a spare register to use during the prolog of a function. */
8288 thumb_find_work_register (unsigned long pushed_regs_mask
)
8292 unsigned long unused_regs
8293 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8295 /* Check the argument registers first as these are call-used. The
8296 register allocation order means that sometimes r3 might be used
8297 but earlier argument registers might not, so check them all. */
8298 for (reg
= LAST_LO_REGNUM
; reg
>= FIRST_LO_REGNUM
; reg
--)
8299 if (unused_regs
& (1 << (reg
- FIRST_LO_REGNUM
)))
8302 /* Otherwise look for a call-saved register that is going to be pushed. */
8303 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
8304 if (pushed_regs_mask
& (1 << reg
))
8309 /* Thumb-2 can use high regs. */
8310 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
8311 if (pushed_regs_mask
& (1 << reg
))
8314 /* Something went wrong - thumb_compute_save_reg_mask()
8315 should have arranged for a suitable register to be pushed. */
8319 static GTY(()) int pic_labelno
;
8321 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8325 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
, rtx pic_reg
)
8327 rtx l1
, labelno
, pic_tmp
, pic_rtx
;
8329 if (crtl
->uses_pic_offset_table
== 0
8330 || TARGET_SINGLE_PIC_BASE
8334 gcc_assert (flag_pic
);
8336 if (pic_reg
== NULL_RTX
)
8337 pic_reg
= cfun
->machine
->pic_reg
;
8338 if (TARGET_VXWORKS_RTP
)
8340 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
8341 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
8342 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
8344 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
8346 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8347 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
8351 /* We use an UNSPEC rather than a LABEL_REF because this label
8352 never appears in the code stream. */
8354 labelno
= GEN_INT (pic_labelno
++);
8355 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8356 l1
= gen_rtx_CONST (VOIDmode
, l1
);
8358 /* On the ARM the PC register contains 'dot + 8' at the time of the
8359 addition, on the Thumb it is 'dot + 4'. */
8360 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
8361 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
8363 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
8367 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8369 else /* TARGET_THUMB1 */
8371 if (arm_pic_register
!= INVALID_REGNUM
8372 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
8374 /* We will have pushed the pic register, so we should always be
8375 able to find a work register. */
8376 pic_tmp
= gen_rtx_REG (SImode
,
8377 thumb_find_work_register (saved_regs
));
8378 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
8379 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
8380 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
8382 else if (arm_pic_register
!= INVALID_REGNUM
8383 && arm_pic_register
> LAST_LO_REGNUM
8384 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
8386 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8387 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
8388 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
8391 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8395 /* Need to emit this whether or not we obey regdecls,
8396 since setjmp/longjmp can cause life info to screw up. */
8400 /* Try to determine whether an object, referenced via ORIG, will be
8401 placed in the text or data segment. This is used in FDPIC mode, to
8402 decide which relocations to use when accessing ORIG. *IS_READONLY
8403 is set to true if ORIG is a read-only location, false otherwise.
8404 Return true if we could determine the location of ORIG, false
8405 otherwise. *IS_READONLY is valid only when we return true. */
8407 arm_is_segment_info_known (rtx orig
, bool *is_readonly
)
8409 *is_readonly
= false;
8411 if (LABEL_REF_P (orig
))
8413 *is_readonly
= true;
8417 if (SYMBOL_REF_P (orig
))
8419 if (CONSTANT_POOL_ADDRESS_P (orig
))
8421 *is_readonly
= true;
8424 if (SYMBOL_REF_LOCAL_P (orig
)
8425 && !SYMBOL_REF_EXTERNAL_P (orig
)
8426 && SYMBOL_REF_DECL (orig
)
8427 && (!DECL_P (SYMBOL_REF_DECL (orig
))
8428 || !DECL_COMMON (SYMBOL_REF_DECL (orig
))))
8430 tree decl
= SYMBOL_REF_DECL (orig
);
8431 tree init
= (TREE_CODE (decl
) == VAR_DECL
)
8432 ? DECL_INITIAL (decl
) : (TREE_CODE (decl
) == CONSTRUCTOR
)
8435 bool named_section
, readonly
;
8437 if (init
&& init
!= error_mark_node
)
8438 reloc
= compute_reloc_for_constant (init
);
8440 named_section
= TREE_CODE (decl
) == VAR_DECL
8441 && lookup_attribute ("section", DECL_ATTRIBUTES (decl
));
8442 readonly
= decl_readonly_section (decl
, reloc
);
8444 /* We don't know where the link script will put a named
8445 section, so return false in such a case. */
8449 *is_readonly
= readonly
;
8453 /* We don't know. */
8460 /* Generate code to load the address of a static var when flag_pic is set. */
8462 arm_pic_static_addr (rtx orig
, rtx reg
)
8464 rtx l1
, labelno
, offset_rtx
;
8467 gcc_assert (flag_pic
);
8469 bool is_readonly
= false;
8470 bool info_known
= false;
8473 && SYMBOL_REF_P (orig
)
8474 && !SYMBOL_REF_FUNCTION_P (orig
))
8475 info_known
= arm_is_segment_info_known (orig
, &is_readonly
);
8478 && SYMBOL_REF_P (orig
)
8479 && !SYMBOL_REF_FUNCTION_P (orig
)
8482 /* We don't know where orig is stored, so we have be
8483 pessimistic and use a GOT relocation. */
8484 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
8486 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
8488 else if (TARGET_FDPIC
8489 && SYMBOL_REF_P (orig
)
8490 && (SYMBOL_REF_FUNCTION_P (orig
)
8493 /* We use the GOTOFF relocation. */
8494 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
8496 rtx l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, orig
), UNSPEC_PIC_SYM
);
8497 emit_insn (gen_movsi (reg
, l1
));
8498 insn
= emit_insn (gen_addsi3 (reg
, reg
, pic_reg
));
8502 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8503 PC-relative access. */
8504 /* We use an UNSPEC rather than a LABEL_REF because this label
8505 never appears in the code stream. */
8506 labelno
= GEN_INT (pic_labelno
++);
8507 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8508 l1
= gen_rtx_CONST (VOIDmode
, l1
);
8510 /* On the ARM the PC register contains 'dot + 8' at the time of the
8511 addition, on the Thumb it is 'dot + 4'. */
8512 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
8513 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
8514 UNSPEC_SYMBOL_OFFSET
);
8515 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
8517 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
,
8524 /* Return nonzero if X is valid as an ARM state addressing register. */
8526 arm_address_register_rtx_p (rtx x
, int strict_p
)
8536 return ARM_REGNO_OK_FOR_BASE_P (regno
);
8538 return (regno
<= LAST_ARM_REGNUM
8539 || regno
>= FIRST_PSEUDO_REGISTER
8540 || regno
== FRAME_POINTER_REGNUM
8541 || regno
== ARG_POINTER_REGNUM
);
8544 /* Return TRUE if this rtx is the difference of a symbol and a label,
8545 and will reduce to a PC-relative relocation in the object file.
8546 Expressions like this can be left alone when generating PIC, rather
8547 than forced through the GOT. */
8549 pcrel_constant_p (rtx x
)
8551 if (GET_CODE (x
) == MINUS
)
8552 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
8557 /* Return true if X will surely end up in an index register after next
8560 will_be_in_index_register (const_rtx x
)
8562 /* arm.md: calculate_pic_address will split this into a register. */
8563 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
8566 /* Return nonzero if X is a valid ARM state address operand. */
8568 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
8572 enum rtx_code code
= GET_CODE (x
);
8574 if (arm_address_register_rtx_p (x
, strict_p
))
8577 use_ldrd
= (TARGET_LDRD
8578 && (mode
== DImode
|| mode
== DFmode
));
8580 if (code
== POST_INC
|| code
== PRE_DEC
8581 || ((code
== PRE_INC
|| code
== POST_DEC
)
8582 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8583 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8585 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8586 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8587 && GET_CODE (XEXP (x
, 1)) == PLUS
8588 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8590 rtx addend
= XEXP (XEXP (x
, 1), 1);
8592 /* Don't allow ldrd post increment by register because it's hard
8593 to fixup invalid register choices. */
8595 && GET_CODE (x
) == POST_MODIFY
8599 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
8600 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
8603 /* After reload constants split into minipools will have addresses
8604 from a LABEL_REF. */
8605 else if (reload_completed
8606 && (code
== LABEL_REF
8608 && GET_CODE (XEXP (x
, 0)) == PLUS
8609 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8610 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8613 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
8616 else if (code
== PLUS
)
8618 rtx xop0
= XEXP (x
, 0);
8619 rtx xop1
= XEXP (x
, 1);
8621 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8622 && ((CONST_INT_P (xop1
)
8623 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
8624 || (!strict_p
&& will_be_in_index_register (xop1
))))
8625 || (arm_address_register_rtx_p (xop1
, strict_p
)
8626 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
8630 /* Reload currently can't handle MINUS, so disable this for now */
8631 else if (GET_CODE (x
) == MINUS
)
8633 rtx xop0
= XEXP (x
, 0);
8634 rtx xop1
= XEXP (x
, 1);
8636 return (arm_address_register_rtx_p (xop0
, strict_p
)
8637 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
8641 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8642 && code
== SYMBOL_REF
8643 && CONSTANT_POOL_ADDRESS_P (x
)
8645 && symbol_mentioned_p (get_pool_constant (x
))
8646 && ! pcrel_constant_p (get_pool_constant (x
))))
8652 /* Return true if we can avoid creating a constant pool entry for x. */
8654 can_avoid_literal_pool_for_label_p (rtx x
)
8656 /* Normally we can assign constant values to target registers without
8657 the help of constant pool. But there are cases we have to use constant
8659 1) assign a label to register.
8660 2) sign-extend a 8bit value to 32bit and then assign to register.
8662 Constant pool access in format:
8663 (set (reg r0) (mem (symbol_ref (".LC0"))))
8664 will cause the use of literal pool (later in function arm_reorg).
8665 So here we mark such format as an invalid format, then the compiler
8666 will adjust it into:
8667 (set (reg r0) (symbol_ref (".LC0")))
8668 (set (reg r0) (mem (reg r0))).
8669 No extra register is required, and (mem (reg r0)) won't cause the use
8670 of literal pools. */
8671 if (arm_disable_literal_pool
&& SYMBOL_REF_P (x
)
8672 && CONSTANT_POOL_ADDRESS_P (x
))
8678 /* Return nonzero if X is a valid Thumb-2 address operand. */
8680 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8683 enum rtx_code code
= GET_CODE (x
);
8685 /* If we are dealing with a MVE predicate mode, then treat it as a HImode as
8686 can store and load it like any other 16-bit value. */
8687 if (TARGET_HAVE_MVE
&& VALID_MVE_PRED_MODE (mode
))
8690 if (TARGET_HAVE_MVE
&& VALID_MVE_MODE (mode
))
8691 return mve_vector_mem_operand (mode
, x
, strict_p
);
8693 if (arm_address_register_rtx_p (x
, strict_p
))
8696 use_ldrd
= (TARGET_LDRD
8697 && (mode
== DImode
|| mode
== DFmode
));
8699 if (code
== POST_INC
|| code
== PRE_DEC
8700 || ((code
== PRE_INC
|| code
== POST_DEC
)
8701 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8702 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8704 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8705 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8706 && GET_CODE (XEXP (x
, 1)) == PLUS
8707 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8709 /* Thumb-2 only has autoincrement by constant. */
8710 rtx addend
= XEXP (XEXP (x
, 1), 1);
8711 HOST_WIDE_INT offset
;
8713 if (!CONST_INT_P (addend
))
8716 offset
= INTVAL(addend
);
8717 if (GET_MODE_SIZE (mode
) <= 4)
8718 return (offset
> -256 && offset
< 256);
8720 return (use_ldrd
&& offset
> -1024 && offset
< 1024
8721 && (offset
& 3) == 0);
8724 /* After reload constants split into minipools will have addresses
8725 from a LABEL_REF. */
8726 else if (reload_completed
8727 && (code
== LABEL_REF
8729 && GET_CODE (XEXP (x
, 0)) == PLUS
8730 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8731 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8734 else if (mode
== TImode
8735 || (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
8736 || (TARGET_HAVE_MVE
&& VALID_MVE_STRUCT_MODE (mode
)))
8739 else if (code
== PLUS
)
8741 rtx xop0
= XEXP (x
, 0);
8742 rtx xop1
= XEXP (x
, 1);
8744 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8745 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
8746 || (!strict_p
&& will_be_in_index_register (xop1
))))
8747 || (arm_address_register_rtx_p (xop1
, strict_p
)
8748 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
8751 else if (can_avoid_literal_pool_for_label_p (x
))
8754 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8755 && code
== SYMBOL_REF
8756 && CONSTANT_POOL_ADDRESS_P (x
)
8758 && symbol_mentioned_p (get_pool_constant (x
))
8759 && ! pcrel_constant_p (get_pool_constant (x
))))
8765 /* Return nonzero if INDEX is valid for an address index operand in
8768 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
8771 HOST_WIDE_INT range
;
8772 enum rtx_code code
= GET_CODE (index
);
8774 /* Standard coprocessor addressing modes. */
8775 if (TARGET_HARD_FLOAT
8776 && (mode
== SFmode
|| mode
== DFmode
))
8777 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8778 && INTVAL (index
) > -1024
8779 && (INTVAL (index
) & 3) == 0);
8781 /* For quad modes, we restrict the constant offset to be slightly less
8782 than what the instruction format permits. We do this because for
8783 quad mode moves, we will actually decompose them into two separate
8784 double-mode reads or writes. INDEX must therefore be a valid
8785 (double-mode) offset and so should INDEX+8. */
8786 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8787 return (code
== CONST_INT
8788 && INTVAL (index
) < 1016
8789 && INTVAL (index
) > -1024
8790 && (INTVAL (index
) & 3) == 0);
8792 /* We have no such constraint on double mode offsets, so we permit the
8793 full range of the instruction format. */
8794 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8795 return (code
== CONST_INT
8796 && INTVAL (index
) < 1024
8797 && INTVAL (index
) > -1024
8798 && (INTVAL (index
) & 3) == 0);
8800 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8801 return (code
== CONST_INT
8802 && INTVAL (index
) < 1024
8803 && INTVAL (index
) > -1024
8804 && (INTVAL (index
) & 3) == 0);
8806 if (arm_address_register_rtx_p (index
, strict_p
)
8807 && (GET_MODE_SIZE (mode
) <= 4))
8810 if (mode
== DImode
|| mode
== DFmode
)
8812 if (code
== CONST_INT
)
8814 HOST_WIDE_INT val
= INTVAL (index
);
8816 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8817 If vldr is selected it uses arm_coproc_mem_operand. */
8819 return val
> -256 && val
< 256;
8821 return val
> -4096 && val
< 4092;
8824 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
8827 if (GET_MODE_SIZE (mode
) <= 4
8831 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
8835 rtx xiop0
= XEXP (index
, 0);
8836 rtx xiop1
= XEXP (index
, 1);
8838 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8839 && power_of_two_operand (xiop1
, SImode
))
8840 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8841 && power_of_two_operand (xiop0
, SImode
)));
8843 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
8844 || code
== ASHIFT
|| code
== ROTATERT
)
8846 rtx op
= XEXP (index
, 1);
8848 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8851 && INTVAL (op
) <= 31);
8855 /* For ARM v4 we may be doing a sign-extend operation during the
8861 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
8867 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
8869 return (code
== CONST_INT
8870 && INTVAL (index
) < range
8871 && INTVAL (index
) > -range
);
8874 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8875 index operand. i.e. 1, 2, 4 or 8. */
8877 thumb2_index_mul_operand (rtx op
)
8881 if (!CONST_INT_P (op
))
8885 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
8888 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8890 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
8892 enum rtx_code code
= GET_CODE (index
);
8894 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8895 /* Standard coprocessor addressing modes. */
8897 && (mode
== SFmode
|| mode
== DFmode
))
8898 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8899 /* Thumb-2 allows only > -256 index range for it's core register
8900 load/stores. Since we allow SF/DF in core registers, we have
8901 to use the intersection between -256~4096 (core) and -1024~1024
8903 && INTVAL (index
) > -256
8904 && (INTVAL (index
) & 3) == 0);
8906 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8908 /* For DImode assume values will usually live in core regs
8909 and only allow LDRD addressing modes. */
8910 if (!TARGET_LDRD
|| mode
!= DImode
)
8911 return (code
== CONST_INT
8912 && INTVAL (index
) < 1024
8913 && INTVAL (index
) > -1024
8914 && (INTVAL (index
) & 3) == 0);
8917 /* For quad modes, we restrict the constant offset to be slightly less
8918 than what the instruction format permits. We do this because for
8919 quad mode moves, we will actually decompose them into two separate
8920 double-mode reads or writes. INDEX must therefore be a valid
8921 (double-mode) offset and so should INDEX+8. */
8922 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8923 return (code
== CONST_INT
8924 && INTVAL (index
) < 1016
8925 && INTVAL (index
) > -1024
8926 && (INTVAL (index
) & 3) == 0);
8928 /* We have no such constraint on double mode offsets, so we permit the
8929 full range of the instruction format. */
8930 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8931 return (code
== CONST_INT
8932 && INTVAL (index
) < 1024
8933 && INTVAL (index
) > -1024
8934 && (INTVAL (index
) & 3) == 0);
8936 if (arm_address_register_rtx_p (index
, strict_p
)
8937 && (GET_MODE_SIZE (mode
) <= 4))
8940 if (mode
== DImode
|| mode
== DFmode
)
8942 if (code
== CONST_INT
)
8944 HOST_WIDE_INT val
= INTVAL (index
);
8945 /* Thumb-2 ldrd only has reg+const addressing modes.
8946 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8947 If vldr is selected it uses arm_coproc_mem_operand. */
8949 return IN_RANGE (val
, -1020, 1020) && (val
& 3) == 0;
8951 return IN_RANGE (val
, -255, 4095 - 4);
8959 rtx xiop0
= XEXP (index
, 0);
8960 rtx xiop1
= XEXP (index
, 1);
8962 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8963 && thumb2_index_mul_operand (xiop1
))
8964 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8965 && thumb2_index_mul_operand (xiop0
)));
8967 else if (code
== ASHIFT
)
8969 rtx op
= XEXP (index
, 1);
8971 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8974 && INTVAL (op
) <= 3);
8977 return (code
== CONST_INT
8978 && INTVAL (index
) < 4096
8979 && INTVAL (index
) > -256);
8982 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8984 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8994 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8996 return (regno
<= LAST_LO_REGNUM
8997 || regno
> LAST_VIRTUAL_REGISTER
8998 || regno
== FRAME_POINTER_REGNUM
8999 || (GET_MODE_SIZE (mode
) >= 4
9000 && (regno
== STACK_POINTER_REGNUM
9001 || regno
>= FIRST_PSEUDO_REGISTER
9002 || x
== hard_frame_pointer_rtx
9003 || x
== arg_pointer_rtx
)));
9006 /* Return nonzero if x is a legitimate index register. This is the case
9007 for any base register that can access a QImode object. */
9009 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
9011 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
9014 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
9016 The AP may be eliminated to either the SP or the FP, so we use the
9017 least common denominator, e.g. SImode, and offsets from 0 to 64.
9019 ??? Verify whether the above is the right approach.
9021 ??? Also, the FP may be eliminated to the SP, so perhaps that
9022 needs special handling also.
9024 ??? Look at how the mips16 port solves this problem. It probably uses
9025 better ways to solve some of these problems.
9027 Although it is not incorrect, we don't accept QImode and HImode
9028 addresses based on the frame pointer or arg pointer until the
9029 reload pass starts. This is so that eliminating such addresses
9030 into stack based ones won't produce impossible code. */
9032 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
9034 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
9037 /* ??? Not clear if this is right. Experiment. */
9038 if (GET_MODE_SIZE (mode
) < 4
9039 && !(reload_in_progress
|| reload_completed
)
9040 && (reg_mentioned_p (frame_pointer_rtx
, x
)
9041 || reg_mentioned_p (arg_pointer_rtx
, x
)
9042 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
9043 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
9044 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
9045 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
9048 /* Accept any base register. SP only in SImode or larger. */
9049 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
9052 /* This is PC relative data before arm_reorg runs. */
9053 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
9055 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
9056 && !arm_disable_literal_pool
)
9059 /* This is PC relative data after arm_reorg runs. */
9060 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
9063 || (GET_CODE (x
) == CONST
9064 && GET_CODE (XEXP (x
, 0)) == PLUS
9065 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
9066 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
9069 /* Post-inc indexing only supported for SImode and larger. */
9070 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
9071 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
9074 else if (GET_CODE (x
) == PLUS
)
9076 /* REG+REG address can be any two index registers. */
9077 /* We disallow FRAME+REG addressing since we know that FRAME
9078 will be replaced with STACK, and SP relative addressing only
9079 permits SP+OFFSET. */
9080 if (GET_MODE_SIZE (mode
) <= 4
9081 && XEXP (x
, 0) != frame_pointer_rtx
9082 && XEXP (x
, 1) != frame_pointer_rtx
9083 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
9084 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
9085 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
9088 /* REG+const has 5-7 bit offset for non-SP registers. */
9089 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
9090 || XEXP (x
, 0) == arg_pointer_rtx
)
9091 && CONST_INT_P (XEXP (x
, 1))
9092 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
9095 /* REG+const has 10-bit offset for SP, but only SImode and
9096 larger is supported. */
9097 /* ??? Should probably check for DI/DFmode overflow here
9098 just like GO_IF_LEGITIMATE_OFFSET does. */
9099 else if (REG_P (XEXP (x
, 0))
9100 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
9101 && GET_MODE_SIZE (mode
) >= 4
9102 && CONST_INT_P (XEXP (x
, 1))
9103 && INTVAL (XEXP (x
, 1)) >= 0
9104 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
9105 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
9108 else if (REG_P (XEXP (x
, 0))
9109 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
9110 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
9111 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
9112 && REGNO (XEXP (x
, 0))
9113 <= LAST_VIRTUAL_POINTER_REGISTER
))
9114 && GET_MODE_SIZE (mode
) >= 4
9115 && CONST_INT_P (XEXP (x
, 1))
9116 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
9120 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
9121 && GET_MODE_SIZE (mode
) == 4
9123 && CONSTANT_POOL_ADDRESS_P (x
)
9124 && !arm_disable_literal_pool
9126 && symbol_mentioned_p (get_pool_constant (x
))
9127 && ! pcrel_constant_p (get_pool_constant (x
))))
9133 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9134 instruction of mode MODE. */
9136 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
9138 switch (GET_MODE_SIZE (mode
))
9141 return val
>= 0 && val
< 32;
9144 return val
>= 0 && val
< 64 && (val
& 1) == 0;
9148 && (val
+ GET_MODE_SIZE (mode
)) <= 128
9154 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
9157 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
9158 else if (TARGET_THUMB2
)
9159 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
9160 else /* if (TARGET_THUMB1) */
9161 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
9164 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9166 Given an rtx X being reloaded into a reg required to be
9167 in class CLASS, return the class of reg to actually use.
9168 In general this is just CLASS, but for the Thumb core registers and
9169 immediate constants we prefer a LO_REGS class or a subset. */
9172 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
9178 if (rclass
== GENERAL_REGS
)
9185 /* Build the SYMBOL_REF for __tls_get_addr. */
9187 static GTY(()) rtx tls_get_addr_libfunc
;
9190 get_tls_get_addr (void)
9192 if (!tls_get_addr_libfunc
)
9193 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
9194 return tls_get_addr_libfunc
;
9198 arm_load_tp (rtx target
)
9201 target
= gen_reg_rtx (SImode
);
9205 /* Can return in any reg. */
9206 emit_insn (gen_load_tp_hard (target
));
9210 /* Always returned in r0. Immediately copy the result into a pseudo,
9211 otherwise other uses of r0 (e.g. setting up function arguments) may
9212 clobber the value. */
9218 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
9219 rtx initial_fdpic_reg
= get_hard_reg_initial_val (Pmode
, FDPIC_REGNUM
);
9221 emit_insn (gen_load_tp_soft_fdpic ());
9224 emit_insn (gen_restore_pic_register_after_call(fdpic_reg
, initial_fdpic_reg
));
9227 emit_insn (gen_load_tp_soft ());
9229 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
9230 emit_move_insn (target
, tmp
);
9236 load_tls_operand (rtx x
, rtx reg
)
9240 if (reg
== NULL_RTX
)
9241 reg
= gen_reg_rtx (SImode
);
9243 tmp
= gen_rtx_CONST (SImode
, x
);
9245 emit_move_insn (reg
, tmp
);
9251 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
9253 rtx label
, labelno
= NULL_RTX
, sum
;
9255 gcc_assert (reloc
!= TLS_DESCSEQ
);
9260 sum
= gen_rtx_UNSPEC (Pmode
,
9261 gen_rtvec (2, x
, GEN_INT (reloc
)),
9266 labelno
= GEN_INT (pic_labelno
++);
9267 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9268 label
= gen_rtx_CONST (VOIDmode
, label
);
9270 sum
= gen_rtx_UNSPEC (Pmode
,
9271 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
9272 GEN_INT (TARGET_ARM
? 8 : 4)),
9275 reg
= load_tls_operand (sum
, reg
);
9278 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
9279 else if (TARGET_ARM
)
9280 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
9282 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
9284 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
9285 LCT_PURE
, /* LCT_CONST? */
9288 rtx_insn
*insns
= get_insns ();
9295 arm_tls_descseq_addr (rtx x
, rtx reg
)
9297 rtx labelno
= GEN_INT (pic_labelno
++);
9298 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9299 rtx sum
= gen_rtx_UNSPEC (Pmode
,
9300 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
9301 gen_rtx_CONST (VOIDmode
, label
),
9302 GEN_INT (!TARGET_ARM
)),
9304 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
9306 emit_insn (gen_tlscall (x
, labelno
));
9308 reg
= gen_reg_rtx (SImode
);
9310 gcc_assert (REGNO (reg
) != R0_REGNUM
);
9312 emit_move_insn (reg
, reg0
);
9319 legitimize_tls_address (rtx x
, rtx reg
)
9321 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
9323 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
9327 case TLS_MODEL_GLOBAL_DYNAMIC
:
9328 if (TARGET_GNU2_TLS
)
9330 gcc_assert (!TARGET_FDPIC
);
9332 reg
= arm_tls_descseq_addr (x
, reg
);
9334 tp
= arm_load_tp (NULL_RTX
);
9336 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
9340 /* Original scheme */
9342 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32_FDPIC
);
9344 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
9345 dest
= gen_reg_rtx (Pmode
);
9346 emit_libcall_block (insns
, dest
, ret
, x
);
9350 case TLS_MODEL_LOCAL_DYNAMIC
:
9351 if (TARGET_GNU2_TLS
)
9353 gcc_assert (!TARGET_FDPIC
);
9355 reg
= arm_tls_descseq_addr (x
, reg
);
9357 tp
= arm_load_tp (NULL_RTX
);
9359 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
9364 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32_FDPIC
);
9366 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
9368 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9369 share the LDM result with other LD model accesses. */
9370 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
9372 dest
= gen_reg_rtx (Pmode
);
9373 emit_libcall_block (insns
, dest
, ret
, eqv
);
9375 /* Load the addend. */
9376 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
9377 GEN_INT (TLS_LDO32
)),
9379 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
9380 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
9384 case TLS_MODEL_INITIAL_EXEC
:
9387 sum
= gen_rtx_UNSPEC (Pmode
,
9388 gen_rtvec (2, x
, GEN_INT (TLS_IE32_FDPIC
)),
9390 reg
= load_tls_operand (sum
, reg
);
9391 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
9392 emit_move_insn (reg
, gen_rtx_MEM (Pmode
, reg
));
9396 labelno
= GEN_INT (pic_labelno
++);
9397 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9398 label
= gen_rtx_CONST (VOIDmode
, label
);
9399 sum
= gen_rtx_UNSPEC (Pmode
,
9400 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
9401 GEN_INT (TARGET_ARM
? 8 : 4)),
9403 reg
= load_tls_operand (sum
, reg
);
9406 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
9407 else if (TARGET_THUMB2
)
9408 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
9411 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
9412 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
9416 tp
= arm_load_tp (NULL_RTX
);
9418 return gen_rtx_PLUS (Pmode
, tp
, reg
);
9420 case TLS_MODEL_LOCAL_EXEC
:
9421 tp
= arm_load_tp (NULL_RTX
);
9423 reg
= gen_rtx_UNSPEC (Pmode
,
9424 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
9426 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
9428 return gen_rtx_PLUS (Pmode
, tp
, reg
);
9435 /* Try machine-dependent ways of modifying an illegitimate address
9436 to be legitimate. If we find one, return the new, valid address. */
9438 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
9440 if (arm_tls_referenced_p (x
))
9444 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
9446 addend
= XEXP (XEXP (x
, 0), 1);
9447 x
= XEXP (XEXP (x
, 0), 0);
9450 if (!SYMBOL_REF_P (x
))
9453 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
9455 x
= legitimize_tls_address (x
, NULL_RTX
);
9459 x
= gen_rtx_PLUS (SImode
, x
, addend
);
9467 return thumb_legitimize_address (x
, orig_x
, mode
);
9469 if (GET_CODE (x
) == PLUS
)
9471 rtx xop0
= XEXP (x
, 0);
9472 rtx xop1
= XEXP (x
, 1);
9474 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
9475 xop0
= force_reg (SImode
, xop0
);
9477 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
9478 && !symbol_mentioned_p (xop1
))
9479 xop1
= force_reg (SImode
, xop1
);
9481 if (ARM_BASE_REGISTER_RTX_P (xop0
)
9482 && CONST_INT_P (xop1
))
9484 HOST_WIDE_INT n
, low_n
;
9488 /* VFP addressing modes actually allow greater offsets, but for
9489 now we just stick with the lowest common denominator. */
9490 if (mode
== DImode
|| mode
== DFmode
)
9502 low_n
= ((mode
) == TImode
? 0
9503 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
9507 base_reg
= gen_reg_rtx (SImode
);
9508 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
9509 emit_move_insn (base_reg
, val
);
9510 x
= plus_constant (Pmode
, base_reg
, low_n
);
9512 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
9513 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
9516 /* XXX We don't allow MINUS any more -- see comment in
9517 arm_legitimate_address_outer_p (). */
9518 else if (GET_CODE (x
) == MINUS
)
9520 rtx xop0
= XEXP (x
, 0);
9521 rtx xop1
= XEXP (x
, 1);
9523 if (CONSTANT_P (xop0
))
9524 xop0
= force_reg (SImode
, xop0
);
9526 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
9527 xop1
= force_reg (SImode
, xop1
);
9529 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
9530 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
9533 /* Make sure to take full advantage of the pre-indexed addressing mode
9534 with absolute addresses which often allows for the base register to
9535 be factorized for multiple adjacent memory references, and it might
9536 even allows for the mini pool to be avoided entirely. */
9537 else if (CONST_INT_P (x
) && optimize
> 0)
9540 HOST_WIDE_INT mask
, base
, index
;
9543 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9544 only use a 8-bit index. So let's use a 12-bit index for
9545 SImode only and hope that arm_gen_constant will enable LDRB
9546 to use more bits. */
9547 bits
= (mode
== SImode
) ? 12 : 8;
9548 mask
= (1 << bits
) - 1;
9549 base
= INTVAL (x
) & ~mask
;
9550 index
= INTVAL (x
) & mask
;
9551 if (TARGET_ARM
&& bit_count (base
& 0xffffffff) > (32 - bits
)/2)
9553 /* It'll most probably be more efficient to generate the
9554 base with more bits set and use a negative index instead.
9555 Don't do this for Thumb as negative offsets are much more
9560 base_reg
= force_reg (SImode
, GEN_INT (base
));
9561 x
= plus_constant (Pmode
, base_reg
, index
);
9566 /* We need to find and carefully transform any SYMBOL and LABEL
9567 references; so go back to the original address expression. */
9568 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9569 false /*compute_now*/);
9571 if (new_x
!= orig_x
)
9579 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9580 to be legitimate. If we find one, return the new, valid address. */
9582 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
9584 if (GET_CODE (x
) == PLUS
9585 && CONST_INT_P (XEXP (x
, 1))
9586 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
9587 || INTVAL (XEXP (x
, 1)) < 0))
9589 rtx xop0
= XEXP (x
, 0);
9590 rtx xop1
= XEXP (x
, 1);
9591 HOST_WIDE_INT offset
= INTVAL (xop1
);
9593 /* Try and fold the offset into a biasing of the base register and
9594 then offsetting that. Don't do this when optimizing for space
9595 since it can cause too many CSEs. */
9596 if (optimize_size
&& offset
>= 0
9597 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
9599 HOST_WIDE_INT delta
;
9602 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
9603 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
9604 delta
= 31 * GET_MODE_SIZE (mode
);
9606 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
9608 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
9610 x
= plus_constant (Pmode
, xop0
, delta
);
9612 else if (offset
< 0 && offset
> -256)
9613 /* Small negative offsets are best done with a subtract before the
9614 dereference, forcing these into a register normally takes two
9616 x
= force_operand (x
, NULL_RTX
);
9619 /* For the remaining cases, force the constant into a register. */
9620 xop1
= force_reg (SImode
, xop1
);
9621 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
9624 else if (GET_CODE (x
) == PLUS
9625 && s_register_operand (XEXP (x
, 1), SImode
)
9626 && !s_register_operand (XEXP (x
, 0), SImode
))
9628 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
9630 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
9635 /* We need to find and carefully transform any SYMBOL and LABEL
9636 references; so go back to the original address expression. */
9637 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9638 false /*compute_now*/);
9640 if (new_x
!= orig_x
)
9647 /* Return TRUE if X contains any TLS symbol references. */
9650 arm_tls_referenced_p (rtx x
)
9652 if (! TARGET_HAVE_TLS
)
9655 subrtx_iterator::array_type array
;
9656 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
9658 const_rtx x
= *iter
;
9659 if (SYMBOL_REF_P (x
) && SYMBOL_REF_TLS_MODEL (x
) != 0)
9661 /* ARM currently does not provide relocations to encode TLS variables
9662 into AArch32 instructions, only data, so there is no way to
9663 currently implement these if a literal pool is disabled. */
9664 if (arm_disable_literal_pool
)
9665 sorry ("accessing thread-local storage is not currently supported "
9666 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9671 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9672 TLS offsets, not real symbol references. */
9673 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9674 iter
.skip_subrtxes ();
9679 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9681 On the ARM, allow any integer (invalid ones are removed later by insn
9682 patterns), nice doubles and symbol_refs which refer to the function's
9685 When generating pic allow anything. */
9688 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
9690 if (GET_CODE (x
) == CONST_VECTOR
&& !neon_make_constant (x
, false))
9693 return flag_pic
|| !label_mentioned_p (x
);
9697 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9699 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9700 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9701 for ARMv8-M Baseline or later the result is valid. */
9702 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
9705 return (CONST_INT_P (x
)
9706 || CONST_DOUBLE_P (x
)
9707 || CONSTANT_ADDRESS_P (x
)
9708 || (TARGET_HAVE_MOVT
&& SYMBOL_REF_P (x
))
9709 /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9710 we build the symbol address with upper/lower
9713 && !label_mentioned_p (x
)
9714 && arm_valid_symbolic_address_p (x
)
9715 && arm_disable_literal_pool
)
9720 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
9722 return (!arm_cannot_force_const_mem (mode
, x
)
9724 ? arm_legitimate_constant_p_1 (mode
, x
)
9725 : thumb_legitimate_constant_p (mode
, x
)));
9728 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9731 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9734 split_const (x
, &base
, &offset
);
9736 if (SYMBOL_REF_P (base
))
9738 /* Function symbols cannot have an offset due to the Thumb bit. */
9739 if ((SYMBOL_REF_FLAGS (base
) & SYMBOL_FLAG_FUNCTION
)
9740 && INTVAL (offset
) != 0)
9743 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9744 && !offset_within_block_p (base
, INTVAL (offset
)))
9747 return arm_tls_referenced_p (x
);
9750 #define REG_OR_SUBREG_REG(X) \
9752 || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9754 #define REG_OR_SUBREG_RTX(X) \
9755 (REG_P (X) ? (X) : SUBREG_REG (X))
9758 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9760 machine_mode mode
= GET_MODE (x
);
9769 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9776 return COSTS_N_INSNS (1);
9779 if (arm_arch6m
&& arm_m_profile_small_mul
)
9780 return COSTS_N_INSNS (32);
9782 if (CONST_INT_P (XEXP (x
, 1)))
9785 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
9792 return COSTS_N_INSNS (2) + cycles
;
9794 return COSTS_N_INSNS (1) + 16;
9797 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9799 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9800 return (COSTS_N_INSNS (words
)
9801 + 4 * ((MEM_P (SET_SRC (x
)))
9802 + MEM_P (SET_DEST (x
))));
9807 if (UINTVAL (x
) < 256
9808 /* 16-bit constant. */
9809 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
9811 if (thumb_shiftable_const (INTVAL (x
)))
9812 return COSTS_N_INSNS (2);
9813 return arm_disable_literal_pool
9815 : COSTS_N_INSNS (3);
9817 else if ((outer
== PLUS
|| outer
== COMPARE
)
9818 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9820 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9821 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9822 return COSTS_N_INSNS (1);
9823 else if (outer
== AND
)
9826 /* This duplicates the tests in the andsi3 expander. */
9827 for (i
= 9; i
<= 31; i
++)
9828 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9829 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9830 return COSTS_N_INSNS (2);
9832 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9833 || outer
== LSHIFTRT
)
9835 return COSTS_N_INSNS (2);
9841 return COSTS_N_INSNS (3);
9859 /* XXX another guess. */
9860 /* Memory costs quite a lot for the first word, but subsequent words
9861 load at the equivalent of a single insn each. */
9862 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9863 + ((SYMBOL_REF_P (x
) && CONSTANT_POOL_ADDRESS_P (x
))
9868 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9874 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
9875 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
9881 return total
+ COSTS_N_INSNS (1);
9883 /* Assume a two-shift sequence. Increase the cost slightly so
9884 we prefer actual shifts over an extend operation. */
9885 return total
+ 1 + COSTS_N_INSNS (2);
9892 /* Estimates the size cost of thumb1 instructions.
9893 For now most of the code is copied from thumb1_rtx_costs. We need more
9894 fine grain tuning when we have more related test cases. */
9896 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9898 machine_mode mode
= GET_MODE (x
);
9907 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9911 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9912 defined by RTL expansion, especially for the expansion of
9914 if ((GET_CODE (XEXP (x
, 0)) == MULT
9915 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
9916 || (GET_CODE (XEXP (x
, 1)) == MULT
9917 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
9918 return COSTS_N_INSNS (2);
9923 return COSTS_N_INSNS (1);
9926 if (CONST_INT_P (XEXP (x
, 1)))
9928 /* Thumb1 mul instruction can't operate on const. We must Load it
9929 into a register first. */
9930 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
9931 /* For the targets which have a very small and high-latency multiply
9932 unit, we prefer to synthesize the mult with up to 5 instructions,
9933 giving a good balance between size and performance. */
9934 if (arm_arch6m
&& arm_m_profile_small_mul
)
9935 return COSTS_N_INSNS (5);
9937 return COSTS_N_INSNS (1) + const_size
;
9939 return COSTS_N_INSNS (1);
9942 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9944 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9945 cost
= COSTS_N_INSNS (words
);
9946 if (satisfies_constraint_J (SET_SRC (x
))
9947 || satisfies_constraint_K (SET_SRC (x
))
9948 /* Too big an immediate for a 2-byte mov, using MOVT. */
9949 || (CONST_INT_P (SET_SRC (x
))
9950 && UINTVAL (SET_SRC (x
)) >= 256
9952 && satisfies_constraint_j (SET_SRC (x
)))
9953 /* thumb1_movdi_insn. */
9954 || ((words
> 1) && MEM_P (SET_SRC (x
))))
9955 cost
+= COSTS_N_INSNS (1);
9961 if (UINTVAL (x
) < 256)
9962 return COSTS_N_INSNS (1);
9963 /* movw is 4byte long. */
9964 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
9965 return COSTS_N_INSNS (2);
9966 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9967 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9968 return COSTS_N_INSNS (2);
9969 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9970 if (thumb_shiftable_const (INTVAL (x
)))
9971 return COSTS_N_INSNS (2);
9972 return arm_disable_literal_pool
9974 : COSTS_N_INSNS (3);
9976 else if ((outer
== PLUS
|| outer
== COMPARE
)
9977 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9979 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9980 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9981 return COSTS_N_INSNS (1);
9982 else if (outer
== AND
)
9985 /* This duplicates the tests in the andsi3 expander. */
9986 for (i
= 9; i
<= 31; i
++)
9987 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9988 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9989 return COSTS_N_INSNS (2);
9991 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9992 || outer
== LSHIFTRT
)
9994 return COSTS_N_INSNS (2);
10000 return COSTS_N_INSNS (3);
10014 return COSTS_N_INSNS (1);
10017 return (COSTS_N_INSNS (1)
10018 + COSTS_N_INSNS (1)
10019 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
10020 + ((SYMBOL_REF_P (x
) && CONSTANT_POOL_ADDRESS_P (x
))
10021 ? COSTS_N_INSNS (1) : 0));
10025 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10030 /* XXX still guessing. */
10031 switch (GET_MODE (XEXP (x
, 0)))
10034 return (1 + (mode
== DImode
? 4 : 0)
10035 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
10038 return (4 + (mode
== DImode
? 4 : 0)
10039 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
10042 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
10053 /* Helper function for arm_rtx_costs. If one operand of the OP, a
10054 PLUS, adds the carry flag, then return the other operand. If
10055 neither is a carry, return OP unchanged. */
10057 strip_carry_operation (rtx op
)
10059 gcc_assert (GET_CODE (op
) == PLUS
);
10060 if (arm_carry_operation (XEXP (op
, 0), GET_MODE (op
)))
10061 return XEXP (op
, 1);
10062 else if (arm_carry_operation (XEXP (op
, 1), GET_MODE (op
)))
10063 return XEXP (op
, 0);
10067 /* Helper function for arm_rtx_costs. If the operand is a valid shift
10068 operand, then return the operand that is being shifted. If the shift
10069 is not by a constant, then set SHIFT_REG to point to the operand.
10070 Return NULL if OP is not a shifter operand. */
10072 shifter_op_p (rtx op
, rtx
*shift_reg
)
10074 enum rtx_code code
= GET_CODE (op
);
10076 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
10077 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
10078 return XEXP (op
, 0);
10079 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
10080 return XEXP (op
, 0);
10081 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
10082 || code
== ASHIFTRT
)
10084 if (!CONST_INT_P (XEXP (op
, 1)))
10085 *shift_reg
= XEXP (op
, 1);
10086 return XEXP (op
, 0);
10093 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
10095 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
10096 rtx_code code
= GET_CODE (x
);
10097 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
10099 switch (XINT (x
, 1))
10101 case UNSPEC_UNALIGNED_LOAD
:
10102 /* We can only do unaligned loads into the integer unit, and we can't
10103 use LDM or LDRD. */
10104 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
10106 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
10107 + extra_cost
->ldst
.load_unaligned
);
10110 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
10111 ADDR_SPACE_GENERIC
, speed_p
);
10115 case UNSPEC_UNALIGNED_STORE
:
10116 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
10118 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
10119 + extra_cost
->ldst
.store_unaligned
);
10121 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
10123 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
10124 ADDR_SPACE_GENERIC
, speed_p
);
10128 case UNSPEC_VRINTZ
:
10129 case UNSPEC_VRINTP
:
10130 case UNSPEC_VRINTM
:
10131 case UNSPEC_VRINTR
:
10132 case UNSPEC_VRINTX
:
10133 case UNSPEC_VRINTA
:
10135 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
10139 *cost
= COSTS_N_INSNS (2);
10145 /* Cost of a libcall. We assume one insn per argument, an amount for the
10146 call (one insn for -Os) and then one for processing the result. */
10147 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10149 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
10152 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
10153 if (shift_op != NULL \
10154 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
10159 *cost += extra_cost->alu.arith_shift_reg; \
10160 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10161 ASHIFT, 1, speed_p); \
10163 else if (speed_p) \
10164 *cost += extra_cost->alu.arith_shift; \
10166 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
10167 ASHIFT, 0, speed_p) \
10168 + rtx_cost (XEXP (x, 1 - IDX), \
10169 GET_MODE (shift_op), \
10170 OP, 1, speed_p)); \
10176 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
10177 considering the costs of the addressing mode and memory access
10180 arm_mem_costs (rtx x
, const struct cpu_cost_table
*extra_cost
,
10181 int *cost
, bool speed_p
)
10183 machine_mode mode
= GET_MODE (x
);
10185 *cost
= COSTS_N_INSNS (1);
10188 && GET_CODE (XEXP (x
, 0)) == PLUS
10189 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
10190 /* This will be split into two instructions. Add the cost of the
10191 additional instruction here. The cost of the memory access is computed
10192 below. See arm.md:calculate_pic_address. */
10193 *cost
+= COSTS_N_INSNS (1);
10195 /* Calculate cost of the addressing mode. */
10198 arm_addr_mode_op op_type
;
10199 switch (GET_CODE (XEXP (x
, 0)))
10203 op_type
= AMO_DEFAULT
;
10206 /* MINUS does not appear in RTL, but the architecture supports it,
10207 so handle this case defensively. */
10210 op_type
= AMO_NO_WB
;
10222 if (VECTOR_MODE_P (mode
))
10223 *cost
+= current_tune
->addr_mode_costs
->vector
[op_type
];
10224 else if (FLOAT_MODE_P (mode
))
10225 *cost
+= current_tune
->addr_mode_costs
->fp
[op_type
];
10227 *cost
+= current_tune
->addr_mode_costs
->integer
[op_type
];
10230 /* Calculate cost of memory access. */
10233 if (FLOAT_MODE_P (mode
))
10235 if (GET_MODE_SIZE (mode
) == 8)
10236 *cost
+= extra_cost
->ldst
.loadd
;
10238 *cost
+= extra_cost
->ldst
.loadf
;
10240 else if (VECTOR_MODE_P (mode
))
10241 *cost
+= extra_cost
->ldst
.loadv
;
10244 /* Integer modes */
10245 if (GET_MODE_SIZE (mode
) == 8)
10246 *cost
+= extra_cost
->ldst
.ldrd
;
10248 *cost
+= extra_cost
->ldst
.load
;
10255 /* Helper for arm_bfi_p. */
10257 arm_bfi_1_p (rtx op0
, rtx op1
, rtx
*sub0
, rtx
*sub1
)
10259 unsigned HOST_WIDE_INT const1
;
10260 unsigned HOST_WIDE_INT const2
= 0;
10262 if (!CONST_INT_P (XEXP (op0
, 1)))
10265 const1
= UINTVAL (XEXP (op0
, 1));
10266 if (!CONST_INT_P (XEXP (op1
, 1))
10267 || ~UINTVAL (XEXP (op1
, 1)) != const1
)
10270 if (GET_CODE (XEXP (op0
, 0)) == ASHIFT
10271 && CONST_INT_P (XEXP (XEXP (op0
, 0), 1)))
10273 const2
= UINTVAL (XEXP (XEXP (op0
, 0), 1));
10274 *sub0
= XEXP (XEXP (op0
, 0), 0);
10277 *sub0
= XEXP (op0
, 0);
10279 if (const2
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
10282 *sub1
= XEXP (op1
, 0);
10283 return exact_log2 (const1
+ (HOST_WIDE_INT_1U
<< const2
)) >= 0;
10286 /* Recognize a BFI idiom. Helper for arm_rtx_costs_internal. The
10287 format looks something like:
10289 (IOR (AND (reg1) (~const1))
10290 (AND (ASHIFT (reg2) (const2))
10293 where const1 is a consecutive sequence of 1-bits with the
10294 least-significant non-zero bit starting at bit position const2. If
10295 const2 is zero, then the shift will not appear at all, due to
10296 canonicalization. The two arms of the IOR expression may be
10299 arm_bfi_p (rtx x
, rtx
*sub0
, rtx
*sub1
)
10301 if (GET_CODE (x
) != IOR
)
10303 if (GET_CODE (XEXP (x
, 0)) != AND
10304 || GET_CODE (XEXP (x
, 1)) != AND
)
10306 return (arm_bfi_1_p (XEXP (x
, 0), XEXP (x
, 1), sub0
, sub1
)
10307 || arm_bfi_1_p (XEXP (x
, 1), XEXP (x
, 0), sub1
, sub0
));
10310 /* RTX costs. Make an estimate of the cost of executing the operation
10311 X, which is contained within an operation with code OUTER_CODE.
10312 SPEED_P indicates whether the cost desired is the performance cost,
10313 or the size cost. The estimate is stored in COST and the return
10314 value is TRUE if the cost calculation is final, or FALSE if the
10315 caller should recurse through the operands of X to add additional
10318 We currently make no attempt to model the size savings of Thumb-2
10319 16-bit instructions. At the normal points in compilation where
10320 this code is called we have no measure of whether the condition
10321 flags are live or not, and thus no realistic way to determine what
10322 the size will eventually be. */
10324 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10325 const struct cpu_cost_table
*extra_cost
,
10326 int *cost
, bool speed_p
)
10328 machine_mode mode
= GET_MODE (x
);
10330 *cost
= COSTS_N_INSNS (1);
10335 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
10337 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
10345 /* SET RTXs don't have a mode so we get it from the destination. */
10346 mode
= GET_MODE (SET_DEST (x
));
10348 if (REG_P (SET_SRC (x
))
10349 && REG_P (SET_DEST (x
)))
10351 /* Assume that most copies can be done with a single insn,
10352 unless we don't have HW FP, in which case everything
10353 larger than word mode will require two insns. */
10354 *cost
= COSTS_N_INSNS (((!TARGET_VFP_BASE
10355 && GET_MODE_SIZE (mode
) > 4)
10358 /* Conditional register moves can be encoded
10359 in 16 bits in Thumb mode. */
10360 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
10366 if (CONST_INT_P (SET_SRC (x
)))
10368 /* Handle CONST_INT here, since the value doesn't have a mode
10369 and we would otherwise be unable to work out the true cost. */
10370 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
10373 /* Slightly lower the cost of setting a core reg to a constant.
10374 This helps break up chains and allows for better scheduling. */
10375 if (REG_P (SET_DEST (x
))
10376 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
10379 /* Immediate moves with an immediate in the range [0, 255] can be
10380 encoded in 16 bits in Thumb mode. */
10381 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
10382 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
10384 goto const_int_cost
;
10390 return arm_mem_costs (x
, extra_cost
, cost
, speed_p
);
10394 /* Calculations of LDM costs are complex. We assume an initial cost
10395 (ldm_1st) which will load the number of registers mentioned in
10396 ldm_regs_per_insn_1st registers; then each additional
10397 ldm_regs_per_insn_subsequent registers cost one more insn. The
10398 formula for N regs is thus:
10400 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10401 + ldm_regs_per_insn_subsequent - 1)
10402 / ldm_regs_per_insn_subsequent).
10404 Additional costs may also be added for addressing. A similar
10405 formula is used for STM. */
10407 bool is_ldm
= load_multiple_operation (x
, SImode
);
10408 bool is_stm
= store_multiple_operation (x
, SImode
);
10410 if (is_ldm
|| is_stm
)
10414 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
10415 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
10416 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
10417 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
10418 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
10419 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
10420 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
10422 *cost
+= regs_per_insn_1st
10423 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
10424 + regs_per_insn_sub
- 1)
10425 / regs_per_insn_sub
);
10434 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10435 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10436 *cost
+= COSTS_N_INSNS (speed_p
10437 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
10438 else if (mode
== SImode
&& TARGET_IDIV
)
10439 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
10441 *cost
= LIBCALL_COST (2);
10443 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10444 possible udiv is prefered. */
10445 *cost
+= (code
== DIV
? COSTS_N_INSNS (1) : 0);
10446 return false; /* All arguments must be in registers. */
10449 /* MOD by a power of 2 can be expanded as:
10451 and r0, r0, #(n - 1)
10452 and r1, r1, #(n - 1)
10453 rsbpl r0, r1, #0. */
10454 if (CONST_INT_P (XEXP (x
, 1))
10455 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
10458 *cost
+= COSTS_N_INSNS (3);
10461 *cost
+= 2 * extra_cost
->alu
.logical
10462 + extra_cost
->alu
.arith
;
10466 /* Fall-through. */
10468 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10469 possible udiv is prefered. */
10470 *cost
= LIBCALL_COST (2) + (code
== MOD
? COSTS_N_INSNS (1) : 0);
10471 return false; /* All arguments must be in registers. */
10474 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
10476 *cost
+= (COSTS_N_INSNS (1)
10477 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
10479 *cost
+= extra_cost
->alu
.shift_reg
;
10487 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
10489 *cost
+= (COSTS_N_INSNS (2)
10490 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
10492 *cost
+= 2 * extra_cost
->alu
.shift
;
10493 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10494 if (code
== ASHIFT
&& XEXP (x
, 1) == CONST1_RTX (SImode
))
10498 else if (mode
== SImode
)
10500 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10501 /* Slightly disparage register shifts at -Os, but not by much. */
10502 if (!CONST_INT_P (XEXP (x
, 1)))
10503 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
10504 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10507 else if (GET_MODE_CLASS (mode
) == MODE_INT
10508 && GET_MODE_SIZE (mode
) < 4)
10510 if (code
== ASHIFT
)
10512 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10513 /* Slightly disparage register shifts at -Os, but not by
10515 if (!CONST_INT_P (XEXP (x
, 1)))
10516 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
10517 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10519 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
10521 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
10523 /* Can use SBFX/UBFX. */
10525 *cost
+= extra_cost
->alu
.bfx
;
10526 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10530 *cost
+= COSTS_N_INSNS (1);
10531 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10534 if (CONST_INT_P (XEXP (x
, 1)))
10535 *cost
+= 2 * extra_cost
->alu
.shift
;
10537 *cost
+= (extra_cost
->alu
.shift
10538 + extra_cost
->alu
.shift_reg
);
10541 /* Slightly disparage register shifts. */
10542 *cost
+= !CONST_INT_P (XEXP (x
, 1));
10545 else /* Rotates. */
10547 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
10548 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10551 if (CONST_INT_P (XEXP (x
, 1)))
10552 *cost
+= (2 * extra_cost
->alu
.shift
10553 + extra_cost
->alu
.log_shift
);
10555 *cost
+= (extra_cost
->alu
.shift
10556 + extra_cost
->alu
.shift_reg
10557 + extra_cost
->alu
.log_shift_reg
);
10563 *cost
= LIBCALL_COST (2);
10569 if (mode
== SImode
)
10572 *cost
+= extra_cost
->alu
.rev
;
10579 /* No rev instruction available. Look at arm_legacy_rev
10580 and thumb_legacy_rev for the form of RTL used then. */
10583 *cost
+= COSTS_N_INSNS (9);
10587 *cost
+= 6 * extra_cost
->alu
.shift
;
10588 *cost
+= 3 * extra_cost
->alu
.logical
;
10593 *cost
+= COSTS_N_INSNS (4);
10597 *cost
+= 2 * extra_cost
->alu
.shift
;
10598 *cost
+= extra_cost
->alu
.arith_shift
;
10599 *cost
+= 2 * extra_cost
->alu
.logical
;
10607 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10608 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10610 if (GET_CODE (XEXP (x
, 0)) == MULT
10611 || GET_CODE (XEXP (x
, 1)) == MULT
)
10613 rtx mul_op0
, mul_op1
, sub_op
;
10616 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10618 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10620 mul_op0
= XEXP (XEXP (x
, 0), 0);
10621 mul_op1
= XEXP (XEXP (x
, 0), 1);
10622 sub_op
= XEXP (x
, 1);
10626 mul_op0
= XEXP (XEXP (x
, 1), 0);
10627 mul_op1
= XEXP (XEXP (x
, 1), 1);
10628 sub_op
= XEXP (x
, 0);
10631 /* The first operand of the multiply may be optionally
10633 if (GET_CODE (mul_op0
) == NEG
)
10634 mul_op0
= XEXP (mul_op0
, 0);
10636 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10637 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10638 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
10644 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10648 if (mode
== SImode
)
10650 rtx shift_by_reg
= NULL
;
10653 rtx op0
= XEXP (x
, 0);
10654 rtx op1
= XEXP (x
, 1);
10656 /* Factor out any borrow operation. There's more than one way
10657 of expressing this; try to recognize them all. */
10658 if (GET_CODE (op0
) == MINUS
)
10660 if (arm_borrow_operation (op1
, SImode
))
10662 op1
= XEXP (op0
, 1);
10663 op0
= XEXP (op0
, 0);
10665 else if (arm_borrow_operation (XEXP (op0
, 1), SImode
))
10666 op0
= XEXP (op0
, 0);
10668 else if (GET_CODE (op1
) == PLUS
10669 && arm_borrow_operation (XEXP (op1
, 0), SImode
))
10670 op1
= XEXP (op1
, 0);
10671 else if (GET_CODE (op0
) == NEG
10672 && arm_borrow_operation (op1
, SImode
))
10674 /* Negate with carry-in. For Thumb2 this is done with
10675 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10676 RSC instruction that exists in Arm mode. */
10678 *cost
+= (TARGET_THUMB2
10679 ? extra_cost
->alu
.arith_shift
10680 : extra_cost
->alu
.arith
);
10681 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, MINUS
, 0, speed_p
);
10684 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10685 Note we do mean ~borrow here. */
10686 else if (TARGET_ARM
&& arm_carry_operation (op0
, SImode
))
10688 *cost
+= rtx_cost (op1
, mode
, code
, 1, speed_p
);
10692 shift_op
= shifter_op_p (op0
, &shift_by_reg
);
10693 if (shift_op
== NULL
)
10695 shift_op
= shifter_op_p (op1
, &shift_by_reg
);
10696 non_shift_op
= op0
;
10699 non_shift_op
= op1
;
10701 if (shift_op
!= NULL
)
10703 if (shift_by_reg
!= NULL
)
10706 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10707 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
10710 *cost
+= extra_cost
->alu
.arith_shift
;
10712 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
10713 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
10717 if (arm_arch_thumb2
10718 && GET_CODE (XEXP (x
, 1)) == MULT
)
10722 *cost
+= extra_cost
->mult
[0].add
;
10723 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
10724 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
10725 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
10729 if (CONST_INT_P (op0
))
10731 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
10732 INTVAL (op0
), NULL_RTX
,
10734 *cost
= COSTS_N_INSNS (insns
);
10736 *cost
+= insns
* extra_cost
->alu
.arith
;
10737 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10741 *cost
+= extra_cost
->alu
.arith
;
10743 /* Don't recurse as we don't want to cost any borrow that
10745 *cost
+= rtx_cost (op0
, mode
, MINUS
, 0, speed_p
);
10746 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10750 if (GET_MODE_CLASS (mode
) == MODE_INT
10751 && GET_MODE_SIZE (mode
) < 4)
10753 rtx shift_op
, shift_reg
;
10756 /* We check both sides of the MINUS for shifter operands since,
10757 unlike PLUS, it's not commutative. */
10759 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0);
10760 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1);
10762 /* Slightly disparage, as we might need to widen the result. */
10765 *cost
+= extra_cost
->alu
.arith
;
10767 if (CONST_INT_P (XEXP (x
, 0)))
10769 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10776 if (mode
== DImode
)
10778 *cost
+= COSTS_N_INSNS (1);
10780 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
10782 rtx op1
= XEXP (x
, 1);
10785 *cost
+= 2 * extra_cost
->alu
.arith
;
10787 if (GET_CODE (op1
) == ZERO_EXTEND
)
10788 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
10791 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10792 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10796 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10799 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
10800 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
10802 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
10805 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10806 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
10809 *cost
+= (extra_cost
->alu
.arith
10810 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10811 ? extra_cost
->alu
.arith
10812 : extra_cost
->alu
.arith_shift
));
10813 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
10814 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10815 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
10820 *cost
+= 2 * extra_cost
->alu
.arith
;
10826 *cost
= LIBCALL_COST (2);
10830 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10831 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10833 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10835 rtx mul_op0
, mul_op1
, add_op
;
10838 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10840 mul_op0
= XEXP (XEXP (x
, 0), 0);
10841 mul_op1
= XEXP (XEXP (x
, 0), 1);
10842 add_op
= XEXP (x
, 1);
10844 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10845 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10846 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
10852 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10855 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10857 *cost
= LIBCALL_COST (2);
10861 /* Narrow modes can be synthesized in SImode, but the range
10862 of useful sub-operations is limited. Check for shift operations
10863 on one of the operands. Only left shifts can be used in the
10865 if (GET_MODE_CLASS (mode
) == MODE_INT
10866 && GET_MODE_SIZE (mode
) < 4)
10868 rtx shift_op
, shift_reg
;
10871 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0);
10873 if (CONST_INT_P (XEXP (x
, 1)))
10875 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10876 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10878 *cost
= COSTS_N_INSNS (insns
);
10880 *cost
+= insns
* extra_cost
->alu
.arith
;
10881 /* Slightly penalize a narrow operation as the result may
10883 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10887 /* Slightly penalize a narrow operation as the result may
10891 *cost
+= extra_cost
->alu
.arith
;
10896 if (mode
== SImode
)
10898 rtx shift_op
, shift_reg
;
10900 if (TARGET_INT_SIMD
10901 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10902 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10904 /* UXTA[BH] or SXTA[BH]. */
10906 *cost
+= extra_cost
->alu
.extend_arith
;
10907 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10909 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
10913 rtx op0
= XEXP (x
, 0);
10914 rtx op1
= XEXP (x
, 1);
10916 /* Handle a side effect of adding in the carry to an addition. */
10917 if (GET_CODE (op0
) == PLUS
10918 && arm_carry_operation (op1
, mode
))
10920 op1
= XEXP (op0
, 1);
10921 op0
= XEXP (op0
, 0);
10923 else if (GET_CODE (op1
) == PLUS
10924 && arm_carry_operation (op0
, mode
))
10926 op0
= XEXP (op1
, 0);
10927 op1
= XEXP (op1
, 1);
10929 else if (GET_CODE (op0
) == PLUS
)
10931 op0
= strip_carry_operation (op0
);
10932 if (swap_commutative_operands_p (op0
, op1
))
10933 std::swap (op0
, op1
);
10936 if (arm_carry_operation (op0
, mode
))
10938 /* Adding the carry to a register is a canonicalization of
10939 adding 0 to the register plus the carry. */
10941 *cost
+= extra_cost
->alu
.arith
;
10942 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
10947 shift_op
= shifter_op_p (op0
, &shift_reg
);
10948 if (shift_op
!= NULL
)
10953 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10954 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10957 *cost
+= extra_cost
->alu
.arith_shift
;
10959 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10960 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10964 if (GET_CODE (op0
) == MULT
)
10968 if (TARGET_DSP_MULTIPLY
10969 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
10970 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10971 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10972 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10973 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
10974 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
10975 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
10976 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
10977 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10978 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10979 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10980 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
10983 /* SMLA[BT][BT]. */
10985 *cost
+= extra_cost
->mult
[0].extend_add
;
10986 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
10987 SIGN_EXTEND
, 0, speed_p
)
10988 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
10989 SIGN_EXTEND
, 0, speed_p
)
10990 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10995 *cost
+= extra_cost
->mult
[0].add
;
10996 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
10997 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
10998 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
11002 if (CONST_INT_P (op1
))
11004 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
11005 INTVAL (op1
), NULL_RTX
,
11007 *cost
= COSTS_N_INSNS (insns
);
11009 *cost
+= insns
* extra_cost
->alu
.arith
;
11010 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
11015 *cost
+= extra_cost
->alu
.arith
;
11017 /* Don't recurse here because we want to test the operands
11018 without any carry operation. */
11019 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
11020 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
11024 if (mode
== DImode
)
11026 if (GET_CODE (XEXP (x
, 0)) == MULT
11027 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
11028 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
11029 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
11030 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
11033 *cost
+= extra_cost
->mult
[1].extend_add
;
11034 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
11035 ZERO_EXTEND
, 0, speed_p
)
11036 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
11037 ZERO_EXTEND
, 0, speed_p
)
11038 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
11042 *cost
+= COSTS_N_INSNS (1);
11044 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11045 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
11048 *cost
+= (extra_cost
->alu
.arith
11049 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11050 ? extra_cost
->alu
.arith
11051 : extra_cost
->alu
.arith_shift
));
11053 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
11055 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
11060 *cost
+= 2 * extra_cost
->alu
.arith
;
11065 *cost
= LIBCALL_COST (2);
11070 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
11073 *cost
+= extra_cost
->alu
.rev
;
11077 else if (mode
== SImode
&& arm_arch_thumb2
11078 && arm_bfi_p (x
, &sub0
, &sub1
))
11080 *cost
+= rtx_cost (sub0
, mode
, ZERO_EXTRACT
, 1, speed_p
);
11081 *cost
+= rtx_cost (sub1
, mode
, ZERO_EXTRACT
, 0, speed_p
);
11083 *cost
+= extra_cost
->alu
.bfi
;
11089 /* Fall through. */
11090 case AND
: case XOR
:
11091 if (mode
== SImode
)
11093 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
11094 rtx op0
= XEXP (x
, 0);
11095 rtx shift_op
, shift_reg
;
11099 || (code
== IOR
&& TARGET_THUMB2
)))
11100 op0
= XEXP (op0
, 0);
11103 shift_op
= shifter_op_p (op0
, &shift_reg
);
11104 if (shift_op
!= NULL
)
11109 *cost
+= extra_cost
->alu
.log_shift_reg
;
11110 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
11113 *cost
+= extra_cost
->alu
.log_shift
;
11115 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
11116 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
11120 if (CONST_INT_P (XEXP (x
, 1)))
11122 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
11123 INTVAL (XEXP (x
, 1)), NULL_RTX
,
11126 *cost
= COSTS_N_INSNS (insns
);
11128 *cost
+= insns
* extra_cost
->alu
.logical
;
11129 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
11134 *cost
+= extra_cost
->alu
.logical
;
11135 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
11136 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
11140 if (mode
== DImode
)
11142 rtx op0
= XEXP (x
, 0);
11143 enum rtx_code subcode
= GET_CODE (op0
);
11145 *cost
+= COSTS_N_INSNS (1);
11149 || (code
== IOR
&& TARGET_THUMB2
)))
11150 op0
= XEXP (op0
, 0);
11152 if (GET_CODE (op0
) == ZERO_EXTEND
)
11155 *cost
+= 2 * extra_cost
->alu
.logical
;
11157 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
11159 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
11162 else if (GET_CODE (op0
) == SIGN_EXTEND
)
11165 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
11167 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
11169 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
11174 *cost
+= 2 * extra_cost
->alu
.logical
;
11180 *cost
= LIBCALL_COST (2);
11184 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11185 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11187 rtx op0
= XEXP (x
, 0);
11189 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
11190 op0
= XEXP (op0
, 0);
11193 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
11195 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
11196 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
11199 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11201 *cost
= LIBCALL_COST (2);
11205 if (mode
== SImode
)
11207 if (TARGET_DSP_MULTIPLY
11208 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
11209 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
11210 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
11211 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
11212 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
11213 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
11214 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11215 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
11216 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
11217 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
11218 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
11219 && (INTVAL (XEXP (XEXP (x
, 1), 1))
11222 /* SMUL[TB][TB]. */
11224 *cost
+= extra_cost
->mult
[0].extend
;
11225 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
11226 SIGN_EXTEND
, 0, speed_p
);
11227 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
11228 SIGN_EXTEND
, 1, speed_p
);
11232 *cost
+= extra_cost
->mult
[0].simple
;
11236 if (mode
== DImode
)
11238 if ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11239 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
11240 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
11241 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
))
11244 *cost
+= extra_cost
->mult
[1].extend
;
11245 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
11246 ZERO_EXTEND
, 0, speed_p
)
11247 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
11248 ZERO_EXTEND
, 0, speed_p
));
11252 *cost
= LIBCALL_COST (2);
11257 *cost
= LIBCALL_COST (2);
11261 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11262 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11264 if (GET_CODE (XEXP (x
, 0)) == MULT
)
11267 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
11272 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
11276 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11278 *cost
= LIBCALL_COST (1);
11282 if (mode
== SImode
)
11284 if (GET_CODE (XEXP (x
, 0)) == ABS
)
11286 *cost
+= COSTS_N_INSNS (1);
11287 /* Assume the non-flag-changing variant. */
11289 *cost
+= (extra_cost
->alu
.log_shift
11290 + extra_cost
->alu
.arith_shift
);
11291 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
11295 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
11296 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
11298 *cost
+= COSTS_N_INSNS (1);
11299 /* No extra cost for MOV imm and MVN imm. */
11300 /* If the comparison op is using the flags, there's no further
11301 cost, otherwise we need to add the cost of the comparison. */
11302 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
11303 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
11304 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
11306 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
11307 *cost
+= (COSTS_N_INSNS (1)
11308 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
11310 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
11313 *cost
+= extra_cost
->alu
.arith
;
11319 *cost
+= extra_cost
->alu
.arith
;
11323 if (GET_MODE_CLASS (mode
) == MODE_INT
11324 && GET_MODE_SIZE (mode
) < 4)
11326 /* Slightly disparage, as we might need an extend operation. */
11329 *cost
+= extra_cost
->alu
.arith
;
11333 if (mode
== DImode
)
11335 *cost
+= COSTS_N_INSNS (1);
11337 *cost
+= 2 * extra_cost
->alu
.arith
;
11342 *cost
= LIBCALL_COST (1);
11346 if (mode
== SImode
)
11349 rtx shift_reg
= NULL
;
11351 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
11355 if (shift_reg
!= NULL
)
11358 *cost
+= extra_cost
->alu
.log_shift_reg
;
11359 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
11362 *cost
+= extra_cost
->alu
.log_shift
;
11363 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
11368 *cost
+= extra_cost
->alu
.logical
;
11371 if (mode
== DImode
)
11373 *cost
+= COSTS_N_INSNS (1);
11379 *cost
+= LIBCALL_COST (1);
11384 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
11386 *cost
+= COSTS_N_INSNS (3);
11389 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
11390 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
11392 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
11393 /* Assume that if one arm of the if_then_else is a register,
11394 that it will be tied with the result and eliminate the
11395 conditional insn. */
11396 if (REG_P (XEXP (x
, 1)))
11398 else if (REG_P (XEXP (x
, 2)))
11404 if (extra_cost
->alu
.non_exec_costs_exec
)
11405 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
11407 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
11410 *cost
+= op1cost
+ op2cost
;
11416 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
11420 machine_mode op0mode
;
11421 /* We'll mostly assume that the cost of a compare is the cost of the
11422 LHS. However, there are some notable exceptions. */
11424 /* Floating point compares are never done as side-effects. */
11425 op0mode
= GET_MODE (XEXP (x
, 0));
11426 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
11427 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11430 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
11432 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
11434 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
11440 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
11442 *cost
= LIBCALL_COST (2);
11446 /* DImode compares normally take two insns. */
11447 if (op0mode
== DImode
)
11449 *cost
+= COSTS_N_INSNS (1);
11451 *cost
+= 2 * extra_cost
->alu
.arith
;
11455 if (op0mode
== SImode
)
11460 if (XEXP (x
, 1) == const0_rtx
11461 && !(REG_P (XEXP (x
, 0))
11462 || (GET_CODE (XEXP (x
, 0)) == SUBREG
11463 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
11465 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
11467 /* Multiply operations that set the flags are often
11468 significantly more expensive. */
11470 && GET_CODE (XEXP (x
, 0)) == MULT
11471 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
11472 *cost
+= extra_cost
->mult
[0].flag_setting
;
11475 && GET_CODE (XEXP (x
, 0)) == PLUS
11476 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11477 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
11479 *cost
+= extra_cost
->mult
[0].flag_setting
;
11484 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
11485 if (shift_op
!= NULL
)
11487 if (shift_reg
!= NULL
)
11489 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
11492 *cost
+= extra_cost
->alu
.arith_shift_reg
;
11495 *cost
+= extra_cost
->alu
.arith_shift
;
11496 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
11497 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
11502 *cost
+= extra_cost
->alu
.arith
;
11503 if (CONST_INT_P (XEXP (x
, 1))
11504 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
11506 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
11514 *cost
= LIBCALL_COST (2);
11524 /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11527 && TARGET_HARD_FLOAT
11528 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
11529 && (XEXP (x
, 1) == CONST0_RTX (mode
)))
11535 /* Fall through. */
11549 if (outer_code
== SET
)
11551 /* Is it a store-flag operation? */
11552 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
11553 && XEXP (x
, 1) == const0_rtx
)
11555 /* Thumb also needs an IT insn. */
11556 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
11559 if (XEXP (x
, 1) == const0_rtx
)
11564 /* LSR Rd, Rn, #31. */
11566 *cost
+= extra_cost
->alu
.shift
;
11576 *cost
+= COSTS_N_INSNS (1);
11580 /* RSBS T1, Rn, Rn, LSR #31
11582 *cost
+= COSTS_N_INSNS (1);
11584 *cost
+= extra_cost
->alu
.arith_shift
;
11588 /* RSB Rd, Rn, Rn, ASR #1
11589 LSR Rd, Rd, #31. */
11590 *cost
+= COSTS_N_INSNS (1);
11592 *cost
+= (extra_cost
->alu
.arith_shift
11593 + extra_cost
->alu
.shift
);
11599 *cost
+= COSTS_N_INSNS (1);
11601 *cost
+= extra_cost
->alu
.shift
;
11605 /* Remaining cases are either meaningless or would take
11606 three insns anyway. */
11607 *cost
= COSTS_N_INSNS (3);
11610 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11615 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
11616 if (CONST_INT_P (XEXP (x
, 1))
11617 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
11619 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11626 /* Not directly inside a set. If it involves the condition code
11627 register it must be the condition for a branch, cond_exec or
11628 I_T_E operation. Since the comparison is performed elsewhere
11629 this is just the control part which has no additional
11631 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
11632 && XEXP (x
, 1) == const0_rtx
)
11640 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11641 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11644 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
11648 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11650 *cost
= LIBCALL_COST (1);
11654 if (mode
== SImode
)
11657 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
11661 *cost
= LIBCALL_COST (1);
11665 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
11666 && MEM_P (XEXP (x
, 0)))
11668 if (mode
== DImode
)
11669 *cost
+= COSTS_N_INSNS (1);
11674 if (GET_MODE (XEXP (x
, 0)) == SImode
)
11675 *cost
+= extra_cost
->ldst
.load
;
11677 *cost
+= extra_cost
->ldst
.load_sign_extend
;
11679 if (mode
== DImode
)
11680 *cost
+= extra_cost
->alu
.shift
;
11685 /* Widening from less than 32-bits requires an extend operation. */
11686 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11688 /* We have SXTB/SXTH. */
11689 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11691 *cost
+= extra_cost
->alu
.extend
;
11693 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11695 /* Needs two shifts. */
11696 *cost
+= COSTS_N_INSNS (1);
11697 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11699 *cost
+= 2 * extra_cost
->alu
.shift
;
11702 /* Widening beyond 32-bits requires one more insn. */
11703 if (mode
== DImode
)
11705 *cost
+= COSTS_N_INSNS (1);
11707 *cost
+= extra_cost
->alu
.shift
;
11714 || GET_MODE (XEXP (x
, 0)) == SImode
11715 || GET_MODE (XEXP (x
, 0)) == QImode
)
11716 && MEM_P (XEXP (x
, 0)))
11718 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11720 if (mode
== DImode
)
11721 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11726 /* Widening from less than 32-bits requires an extend operation. */
11727 if (GET_MODE (XEXP (x
, 0)) == QImode
)
11729 /* UXTB can be a shorter instruction in Thumb2, but it might
11730 be slower than the AND Rd, Rn, #255 alternative. When
11731 optimizing for speed it should never be slower to use
11732 AND, and we don't really model 16-bit vs 32-bit insns
11735 *cost
+= extra_cost
->alu
.logical
;
11737 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11739 /* We have UXTB/UXTH. */
11740 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11742 *cost
+= extra_cost
->alu
.extend
;
11744 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11746 /* Needs two shifts. It's marginally preferable to use
11747 shifts rather than two BIC instructions as the second
11748 shift may merge with a subsequent insn as a shifter
11750 *cost
= COSTS_N_INSNS (2);
11751 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11753 *cost
+= 2 * extra_cost
->alu
.shift
;
11756 /* Widening beyond 32-bits requires one more insn. */
11757 if (mode
== DImode
)
11759 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11766 /* CONST_INT has no mode, so we cannot tell for sure how many
11767 insns are really going to be needed. The best we can do is
11768 look at the value passed. If it fits in SImode, then assume
11769 that's the mode it will be used for. Otherwise assume it
11770 will be used in DImode. */
11771 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
11776 /* Avoid blowing up in arm_gen_constant (). */
11777 if (!(outer_code
== PLUS
11778 || outer_code
== AND
11779 || outer_code
== IOR
11780 || outer_code
== XOR
11781 || outer_code
== MINUS
))
11785 if (mode
== SImode
)
11787 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
11788 INTVAL (x
), NULL
, NULL
,
11794 *cost
+= COSTS_N_INSNS (arm_gen_constant
11795 (outer_code
, SImode
, NULL
,
11796 trunc_int_for_mode (INTVAL (x
), SImode
),
11798 + arm_gen_constant (outer_code
, SImode
, NULL
,
11799 INTVAL (x
) >> 32, NULL
,
11811 if (arm_arch_thumb2
&& !flag_pic
)
11812 *cost
+= COSTS_N_INSNS (1);
11814 *cost
+= extra_cost
->ldst
.load
;
11817 *cost
+= COSTS_N_INSNS (1);
11821 *cost
+= COSTS_N_INSNS (1);
11823 *cost
+= extra_cost
->alu
.arith
;
11829 *cost
= COSTS_N_INSNS (4);
11834 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11835 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11837 if (vfp3_const_double_rtx (x
))
11840 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
11846 if (mode
== DFmode
)
11847 *cost
+= extra_cost
->ldst
.loadd
;
11849 *cost
+= extra_cost
->ldst
.loadf
;
11852 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
11856 *cost
= COSTS_N_INSNS (4);
11861 if (((TARGET_NEON
&& TARGET_HARD_FLOAT
11862 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
)))
11863 || TARGET_HAVE_MVE
)
11864 && simd_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
11865 *cost
= COSTS_N_INSNS (1);
11867 *cost
= COSTS_N_INSNS (4);
11872 /* When optimizing for size, we prefer constant pool entries to
11873 MOVW/MOVT pairs, so bump the cost of these slightly. */
11880 *cost
+= extra_cost
->alu
.clz
;
11884 if (XEXP (x
, 1) == const0_rtx
)
11887 *cost
+= extra_cost
->alu
.log_shift
;
11888 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11891 /* Fall through. */
11895 *cost
+= COSTS_N_INSNS (1);
11899 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
11900 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11901 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
11902 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11903 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
11904 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
11905 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
11906 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11910 *cost
+= extra_cost
->mult
[1].extend
;
11911 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
11912 ZERO_EXTEND
, 0, speed_p
)
11913 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
11914 ZERO_EXTEND
, 0, speed_p
));
11917 *cost
= LIBCALL_COST (1);
11920 case UNSPEC_VOLATILE
:
11922 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
11925 /* Reading the PC is like reading any other register. Writing it
11926 is more expensive, but we take that into account elsewhere. */
11931 /* TODO: Simple zero_extract of bottom bits using AND. */
11932 /* Fall through. */
11936 && CONST_INT_P (XEXP (x
, 1))
11937 && CONST_INT_P (XEXP (x
, 2)))
11940 *cost
+= extra_cost
->alu
.bfx
;
11941 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11944 /* Without UBFX/SBFX, need to resort to shift operations. */
11945 *cost
+= COSTS_N_INSNS (1);
11947 *cost
+= 2 * extra_cost
->alu
.shift
;
11948 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
11952 if (TARGET_HARD_FLOAT
)
11955 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
11957 && GET_MODE (XEXP (x
, 0)) == HFmode
)
11959 /* Pre v8, widening HF->DF is a two-step process, first
11960 widening to SFmode. */
11961 *cost
+= COSTS_N_INSNS (1);
11963 *cost
+= extra_cost
->fp
[0].widen
;
11965 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11969 *cost
= LIBCALL_COST (1);
11972 case FLOAT_TRUNCATE
:
11973 if (TARGET_HARD_FLOAT
)
11976 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
11977 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11979 /* Vector modes? */
11981 *cost
= LIBCALL_COST (1);
11985 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
11987 rtx op0
= XEXP (x
, 0);
11988 rtx op1
= XEXP (x
, 1);
11989 rtx op2
= XEXP (x
, 2);
11992 /* vfms or vfnma. */
11993 if (GET_CODE (op0
) == NEG
)
11994 op0
= XEXP (op0
, 0);
11996 /* vfnms or vfnma. */
11997 if (GET_CODE (op2
) == NEG
)
11998 op2
= XEXP (op2
, 0);
12000 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
12001 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
12002 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
12005 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
12010 *cost
= LIBCALL_COST (3);
12015 if (TARGET_HARD_FLOAT
)
12017 /* The *combine_vcvtf2i reduces a vmul+vcvt into
12018 a vcvt fixed-point conversion. */
12019 if (code
== FIX
&& mode
== SImode
12020 && GET_CODE (XEXP (x
, 0)) == FIX
12021 && GET_MODE (XEXP (x
, 0)) == SFmode
12022 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
12023 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
12027 *cost
+= extra_cost
->fp
[0].toint
;
12029 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
12034 if (GET_MODE_CLASS (mode
) == MODE_INT
)
12036 mode
= GET_MODE (XEXP (x
, 0));
12038 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
12039 /* Strip of the 'cost' of rounding towards zero. */
12040 if (GET_CODE (XEXP (x
, 0)) == FIX
)
12041 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
12044 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
12045 /* ??? Increase the cost to deal with transferring from
12046 FP -> CORE registers? */
12049 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
12053 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
12056 /* Vector costs? */
12058 *cost
= LIBCALL_COST (1);
12062 case UNSIGNED_FLOAT
:
12063 if (TARGET_HARD_FLOAT
)
12065 /* ??? Increase the cost to deal with transferring from CORE
12066 -> FP registers? */
12068 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
12071 *cost
= LIBCALL_COST (1);
12079 /* Just a guess. Guess number of instructions in the asm
12080 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
12081 though (see PR60663). */
12082 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
12083 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
12085 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
12089 if (mode
!= VOIDmode
)
12090 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
12092 *cost
= COSTS_N_INSNS (4); /* Who knows? */
12097 #undef HANDLE_NARROW_SHIFT_ARITH
12099 /* RTX costs entry point. */
12102 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
12103 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
12106 int code
= GET_CODE (x
);
12107 gcc_assert (current_tune
->insn_extra_cost
);
12109 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
12110 (enum rtx_code
) outer_code
,
12111 current_tune
->insn_extra_cost
,
12114 if (dump_file
&& arm_verbose_cost
)
12116 print_rtl_single (dump_file
, x
);
12117 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
12118 *total
, result
? "final" : "partial");
12124 arm_insn_cost (rtx_insn
*insn
, bool speed
)
12128 /* Don't cost a simple reg-reg move at a full insn cost: such moves
12129 will likely disappear during register allocation. */
12130 if (!reload_completed
12131 && GET_CODE (PATTERN (insn
)) == SET
12132 && REG_P (SET_DEST (PATTERN (insn
)))
12133 && REG_P (SET_SRC (PATTERN (insn
))))
12135 cost
= pattern_cost (PATTERN (insn
), speed
);
12136 /* If the cost is zero, then it's likely a complex insn. We don't want the
12137 cost of these to be less than something we know about. */
12138 return cost
? cost
: COSTS_N_INSNS (2);
12141 /* All address computations that can be done are free, but rtx cost returns
12142 the same for practically all of them. So we weight the different types
12143 of address here in the order (most pref first):
12144 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
12146 arm_arm_address_cost (rtx x
)
12148 enum rtx_code c
= GET_CODE (x
);
12150 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
12152 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
12157 if (CONST_INT_P (XEXP (x
, 1)))
12160 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
12170 arm_thumb_address_cost (rtx x
)
12172 enum rtx_code c
= GET_CODE (x
);
12177 && REG_P (XEXP (x
, 0))
12178 && CONST_INT_P (XEXP (x
, 1)))
12185 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
12186 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
12188 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
12191 /* Adjust cost hook for XScale. */
12193 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12196 /* Some true dependencies can have a higher cost depending
12197 on precisely how certain input operands are used. */
12199 && recog_memoized (insn
) >= 0
12200 && recog_memoized (dep
) >= 0)
12202 int shift_opnum
= get_attr_shift (insn
);
12203 enum attr_type attr_type
= get_attr_type (dep
);
12205 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12206 operand for INSN. If we have a shifted input operand and the
12207 instruction we depend on is another ALU instruction, then we may
12208 have to account for an additional stall. */
12209 if (shift_opnum
!= 0
12210 && (attr_type
== TYPE_ALU_SHIFT_IMM_LSL_1TO4
12211 || attr_type
== TYPE_ALU_SHIFT_IMM_OTHER
12212 || attr_type
== TYPE_ALUS_SHIFT_IMM
12213 || attr_type
== TYPE_LOGIC_SHIFT_IMM
12214 || attr_type
== TYPE_LOGICS_SHIFT_IMM
12215 || attr_type
== TYPE_ALU_SHIFT_REG
12216 || attr_type
== TYPE_ALUS_SHIFT_REG
12217 || attr_type
== TYPE_LOGIC_SHIFT_REG
12218 || attr_type
== TYPE_LOGICS_SHIFT_REG
12219 || attr_type
== TYPE_MOV_SHIFT
12220 || attr_type
== TYPE_MVN_SHIFT
12221 || attr_type
== TYPE_MOV_SHIFT_REG
12222 || attr_type
== TYPE_MVN_SHIFT_REG
))
12224 rtx shifted_operand
;
12227 /* Get the shifted operand. */
12228 extract_insn (insn
);
12229 shifted_operand
= recog_data
.operand
[shift_opnum
];
12231 /* Iterate over all the operands in DEP. If we write an operand
12232 that overlaps with SHIFTED_OPERAND, then we have increase the
12233 cost of this dependency. */
12234 extract_insn (dep
);
12235 preprocess_constraints (dep
);
12236 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
12238 /* We can ignore strict inputs. */
12239 if (recog_data
.operand_type
[opno
] == OP_IN
)
12242 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
12254 /* Adjust cost hook for Cortex A9. */
12256 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12266 case REG_DEP_OUTPUT
:
12267 if (recog_memoized (insn
) >= 0
12268 && recog_memoized (dep
) >= 0)
12270 if (GET_CODE (PATTERN (insn
)) == SET
)
12273 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
12275 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
12277 enum attr_type attr_type_insn
= get_attr_type (insn
);
12278 enum attr_type attr_type_dep
= get_attr_type (dep
);
12280 /* By default all dependencies of the form
12283 have an extra latency of 1 cycle because
12284 of the input and output dependency in this
12285 case. However this gets modeled as an true
12286 dependency and hence all these checks. */
12287 if (REG_P (SET_DEST (PATTERN (insn
)))
12288 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
12290 /* FMACS is a special case where the dependent
12291 instruction can be issued 3 cycles before
12292 the normal latency in case of an output
12294 if ((attr_type_insn
== TYPE_FMACS
12295 || attr_type_insn
== TYPE_FMACD
)
12296 && (attr_type_dep
== TYPE_FMACS
12297 || attr_type_dep
== TYPE_FMACD
))
12299 if (dep_type
== REG_DEP_OUTPUT
)
12300 *cost
= insn_default_latency (dep
) - 3;
12302 *cost
= insn_default_latency (dep
);
12307 if (dep_type
== REG_DEP_OUTPUT
)
12308 *cost
= insn_default_latency (dep
) + 1;
12310 *cost
= insn_default_latency (dep
);
12320 gcc_unreachable ();
12326 /* Adjust cost hook for FA726TE. */
12328 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12331 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12332 have penalty of 3. */
12333 if (dep_type
== REG_DEP_TRUE
12334 && recog_memoized (insn
) >= 0
12335 && recog_memoized (dep
) >= 0
12336 && get_attr_conds (dep
) == CONDS_SET
)
12338 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12339 if (get_attr_conds (insn
) == CONDS_USE
12340 && get_attr_type (insn
) != TYPE_BRANCH
)
12346 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
12347 || get_attr_conds (insn
) == CONDS_USE
)
12357 /* Implement TARGET_REGISTER_MOVE_COST.
12359 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12360 it is typically more expensive than a single memory access. We set
12361 the cost to less than two memory accesses so that floating
12362 point to integer conversion does not go through memory. */
12365 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
12366 reg_class_t from
, reg_class_t to
)
12370 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
12371 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
12373 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
12374 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
12376 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
12383 if (from
== HI_REGS
|| to
== HI_REGS
)
12390 /* Implement TARGET_MEMORY_MOVE_COST. */
12393 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
12394 bool in ATTRIBUTE_UNUSED
)
12400 if (GET_MODE_SIZE (mode
) < 4)
12403 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
12407 /* Vectorizer cost model implementation. */
12409 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12411 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
12413 int misalign ATTRIBUTE_UNUSED
)
12417 switch (type_of_cost
)
12420 return current_tune
->vec_costs
->scalar_stmt_cost
;
12423 return current_tune
->vec_costs
->scalar_load_cost
;
12426 return current_tune
->vec_costs
->scalar_store_cost
;
12429 return current_tune
->vec_costs
->vec_stmt_cost
;
12432 return current_tune
->vec_costs
->vec_align_load_cost
;
12435 return current_tune
->vec_costs
->vec_store_cost
;
12437 case vec_to_scalar
:
12438 return current_tune
->vec_costs
->vec_to_scalar_cost
;
12440 case scalar_to_vec
:
12441 return current_tune
->vec_costs
->scalar_to_vec_cost
;
12443 case unaligned_load
:
12444 case vector_gather_load
:
12445 return current_tune
->vec_costs
->vec_unalign_load_cost
;
12447 case unaligned_store
:
12448 case vector_scatter_store
:
12449 return current_tune
->vec_costs
->vec_unalign_store_cost
;
12451 case cond_branch_taken
:
12452 return current_tune
->vec_costs
->cond_taken_branch_cost
;
12454 case cond_branch_not_taken
:
12455 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
12458 case vec_promote_demote
:
12459 return current_tune
->vec_costs
->vec_stmt_cost
;
12461 case vec_construct
:
12462 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
12463 return elements
/ 2 + 1;
12466 gcc_unreachable ();
12470 /* Return true if and only if this insn can dual-issue only as older. */
12472 cortexa7_older_only (rtx_insn
*insn
)
12474 if (recog_memoized (insn
) < 0)
12477 switch (get_attr_type (insn
))
12479 case TYPE_ALU_DSP_REG
:
12480 case TYPE_ALU_SREG
:
12481 case TYPE_ALUS_SREG
:
12482 case TYPE_LOGIC_REG
:
12483 case TYPE_LOGICS_REG
:
12485 case TYPE_ADCS_REG
:
12490 case TYPE_SHIFT_IMM
:
12491 case TYPE_SHIFT_REG
:
12492 case TYPE_LOAD_BYTE
:
12495 case TYPE_FFARITHS
:
12497 case TYPE_FFARITHD
:
12515 case TYPE_F_STORES
:
12522 /* Return true if and only if this insn can dual-issue as younger. */
12524 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
12526 if (recog_memoized (insn
) < 0)
12529 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
12533 switch (get_attr_type (insn
))
12536 case TYPE_ALUS_IMM
:
12537 case TYPE_LOGIC_IMM
:
12538 case TYPE_LOGICS_IMM
:
12543 case TYPE_MOV_SHIFT
:
12544 case TYPE_MOV_SHIFT_REG
:
12554 /* Look for an instruction that can dual issue only as an older
12555 instruction, and move it in front of any instructions that can
12556 dual-issue as younger, while preserving the relative order of all
12557 other instructions in the ready list. This is a hueuristic to help
12558 dual-issue in later cycles, by postponing issue of more flexible
12559 instructions. This heuristic may affect dual issue opportunities
12560 in the current cycle. */
12562 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
12563 int *n_readyp
, int clock
)
12566 int first_older_only
= -1, first_younger
= -1;
12570 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12574 /* Traverse the ready list from the head (the instruction to issue
12575 first), and looking for the first instruction that can issue as
12576 younger and the first instruction that can dual-issue only as
12578 for (i
= *n_readyp
- 1; i
>= 0; i
--)
12580 rtx_insn
*insn
= ready
[i
];
12581 if (cortexa7_older_only (insn
))
12583 first_older_only
= i
;
12585 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
12588 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
12592 /* Nothing to reorder because either no younger insn found or insn
12593 that can dual-issue only as older appears before any insn that
12594 can dual-issue as younger. */
12595 if (first_younger
== -1)
12598 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
12602 /* Nothing to reorder because no older-only insn in the ready list. */
12603 if (first_older_only
== -1)
12606 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
12610 /* Move first_older_only insn before first_younger. */
12612 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
12613 INSN_UID(ready
[first_older_only
]),
12614 INSN_UID(ready
[first_younger
]));
12615 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
12616 for (i
= first_older_only
; i
< first_younger
; i
++)
12618 ready
[i
] = ready
[i
+1];
12621 ready
[i
] = first_older_only_insn
;
12625 /* Implement TARGET_SCHED_REORDER. */
12627 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
12632 case TARGET_CPU_cortexa7
:
12633 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
12636 /* Do nothing for other cores. */
12640 return arm_issue_rate ();
12643 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12644 It corrects the value of COST based on the relationship between
12645 INSN and DEP through the dependence LINK. It returns the new
12646 value. There is a per-core adjust_cost hook to adjust scheduler costs
12647 and the per-core hook can choose to completely override the generic
12648 adjust_cost function. Only put bits of code into arm_adjust_cost that
12649 are common across all cores. */
12651 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
12656 /* When generating Thumb-1 code, we want to place flag-setting operations
12657 close to a conditional branch which depends on them, so that we can
12658 omit the comparison. */
12661 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
12662 && recog_memoized (dep
) >= 0
12663 && get_attr_conds (dep
) == CONDS_SET
)
12666 if (current_tune
->sched_adjust_cost
!= NULL
)
12668 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
12672 /* XXX Is this strictly true? */
12673 if (dep_type
== REG_DEP_ANTI
12674 || dep_type
== REG_DEP_OUTPUT
)
12677 /* Call insns don't incur a stall, even if they follow a load. */
12682 if ((i_pat
= single_set (insn
)) != NULL
12683 && MEM_P (SET_SRC (i_pat
))
12684 && (d_pat
= single_set (dep
)) != NULL
12685 && MEM_P (SET_DEST (d_pat
)))
12687 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
12688 /* This is a load after a store, there is no conflict if the load reads
12689 from a cached area. Assume that loads from the stack, and from the
12690 constant pool are cached, and that others will miss. This is a
12693 if ((SYMBOL_REF_P (src_mem
)
12694 && CONSTANT_POOL_ADDRESS_P (src_mem
))
12695 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
12696 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
12697 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
12705 arm_max_conditional_execute (void)
12707 return max_insns_skipped
;
12711 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
12714 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12716 return (optimize
> 0) ? 2 : 0;
12720 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12722 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12725 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12726 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12727 sequences of non-executed instructions in IT blocks probably take the same
12728 amount of time as executed instructions (and the IT instruction itself takes
12729 space in icache). This function was experimentally determined to give good
12730 results on a popular embedded benchmark. */
12733 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12735 return (TARGET_32BIT
&& speed_p
) ? 1
12736 : arm_default_branch_cost (speed_p
, predictable_p
);
12740 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
12742 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12745 static bool fp_consts_inited
= false;
12747 static REAL_VALUE_TYPE value_fp0
;
12750 init_fp_table (void)
12754 r
= REAL_VALUE_ATOF ("0", DFmode
);
12756 fp_consts_inited
= true;
12759 /* Return TRUE if rtx X is a valid immediate FP constant. */
12761 arm_const_double_rtx (rtx x
)
12763 const REAL_VALUE_TYPE
*r
;
12765 if (!fp_consts_inited
)
12768 r
= CONST_DOUBLE_REAL_VALUE (x
);
12769 if (REAL_VALUE_MINUS_ZERO (*r
))
12772 if (real_equal (r
, &value_fp0
))
12778 /* VFPv3 has a fairly wide range of representable immediates, formed from
12779 "quarter-precision" floating-point values. These can be evaluated using this
12780 formula (with ^ for exponentiation):
12784 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12785 16 <= n <= 31 and 0 <= r <= 7.
12787 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12789 - A (most-significant) is the sign bit.
12790 - BCD are the exponent (encoded as r XOR 3).
12791 - EFGH are the mantissa (encoded as n - 16).
12794 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12795 fconst[sd] instruction, or -1 if X isn't suitable. */
12797 vfp3_const_double_index (rtx x
)
12799 REAL_VALUE_TYPE r
, m
;
12800 int sign
, exponent
;
12801 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12802 unsigned HOST_WIDE_INT mask
;
12803 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12806 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12809 r
= *CONST_DOUBLE_REAL_VALUE (x
);
12811 /* We can't represent these things, so detect them first. */
12812 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12815 /* Extract sign, exponent and mantissa. */
12816 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12817 r
= real_value_abs (&r
);
12818 exponent
= REAL_EXP (&r
);
12819 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12820 highest (sign) bit, with a fixed binary point at bit point_pos.
12821 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12822 bits for the mantissa, this may fail (low bits would be lost). */
12823 real_ldexp (&m
, &r
, point_pos
- exponent
);
12824 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12825 mantissa
= w
.elt (0);
12826 mant_hi
= w
.elt (1);
12828 /* If there are bits set in the low part of the mantissa, we can't
12829 represent this value. */
12833 /* Now make it so that mantissa contains the most-significant bits, and move
12834 the point_pos to indicate that the least-significant bits have been
12836 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12837 mantissa
= mant_hi
;
12839 /* We can permit four significant bits of mantissa only, plus a high bit
12840 which is always 1. */
12841 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
12842 if ((mantissa
& mask
) != 0)
12845 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12846 mantissa
>>= point_pos
- 5;
12848 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12849 floating-point immediate zero with Neon using an integer-zero load, but
12850 that case is handled elsewhere.) */
12854 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12856 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12857 normalized significands are in the range [1, 2). (Our mantissa is shifted
12858 left 4 places at this point relative to normalized IEEE754 values). GCC
12859 internally uses [0.5, 1) (see real.cc), so the exponent returned from
12860 REAL_EXP must be altered. */
12861 exponent
= 5 - exponent
;
12863 if (exponent
< 0 || exponent
> 7)
12866 /* Sign, mantissa and exponent are now in the correct form to plug into the
12867 formula described in the comment above. */
12868 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12871 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12873 vfp3_const_double_rtx (rtx x
)
12878 return vfp3_const_double_index (x
) != -1;
12881 /* Recognize immediates which can be used in various Neon and MVE instructions.
12882 Legal immediates are described by the following table (for VMVN variants, the
12883 bitwise inverse of the constant shown is recognized. In either case, VMOV
12884 is output and the correct instruction to use for a given constant is chosen
12885 by the assembler). The constant shown is replicated across all elements of
12886 the destination vector.
12888 insn elems variant constant (binary)
12889 ---- ----- ------- -----------------
12890 vmov i32 0 00000000 00000000 00000000 abcdefgh
12891 vmov i32 1 00000000 00000000 abcdefgh 00000000
12892 vmov i32 2 00000000 abcdefgh 00000000 00000000
12893 vmov i32 3 abcdefgh 00000000 00000000 00000000
12894 vmov i16 4 00000000 abcdefgh
12895 vmov i16 5 abcdefgh 00000000
12896 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12897 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12898 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12899 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12900 vmvn i16 10 00000000 abcdefgh
12901 vmvn i16 11 abcdefgh 00000000
12902 vmov i32 12 00000000 00000000 abcdefgh 11111111
12903 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12904 vmov i32 14 00000000 abcdefgh 11111111 11111111
12905 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12906 vmov i8 16 abcdefgh
12907 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12908 eeeeeeee ffffffff gggggggg hhhhhhhh
12909 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12910 vmov f32 19 00000000 00000000 00000000 00000000
12912 For case 18, B = !b. Representable values are exactly those accepted by
12913 vfp3_const_double_index, but are output as floating-point numbers rather
12916 For case 19, we will change it to vmov.i32 when assembling.
12918 Variants 0-5 (inclusive) may also be used as immediates for the second
12919 operand of VORR/VBIC instructions.
12921 The INVERSE argument causes the bitwise inverse of the given operand to be
12922 recognized instead (used for recognizing legal immediates for the VAND/VORN
12923 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12924 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12925 output, rather than the real insns vbic/vorr).
12927 INVERSE makes no difference to the recognition of float vectors.
12929 The return value is the variant of immediate as shown in the above table, or
12930 -1 if the given value doesn't match any of the listed patterns.
12933 simd_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12934 rtx
*modconst
, int *elementwidth
)
12936 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12938 for (i = 0; i < idx; i += (STRIDE)) \
12943 immtype = (CLASS); \
12944 elsize = (ELSIZE); \
12948 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12949 unsigned int innersize
;
12950 unsigned char bytes
[16] = {};
12951 int immtype
= -1, matches
;
12952 unsigned int invmask
= inverse
? 0xff : 0;
12953 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12956 n_elts
= CONST_VECTOR_NUNITS (op
);
12960 gcc_assert (mode
!= VOIDmode
);
12963 innersize
= GET_MODE_UNIT_SIZE (mode
);
12965 /* Only support 128-bit vectors for MVE. */
12966 if (TARGET_HAVE_MVE
12968 || VALID_MVE_PRED_MODE (mode
)
12969 || n_elts
* innersize
!= 16))
12972 if (!TARGET_HAVE_MVE
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_BOOL
)
12975 /* Vectors of float constants. */
12976 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12978 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12980 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12983 /* FP16 vectors cannot be represented. */
12984 if (GET_MODE_INNER (mode
) == HFmode
)
12987 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12988 are distinct in this context. */
12989 if (!const_vec_duplicate_p (op
))
12993 *modconst
= CONST_VECTOR_ELT (op
, 0);
12998 if (el0
== CONST0_RTX (GET_MODE (el0
)))
13004 /* The tricks done in the code below apply for little-endian vector layout.
13005 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
13006 FIXME: Implement logic for big-endian vectors. */
13007 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
13010 /* Splat vector constant out into a byte vector. */
13011 for (i
= 0; i
< n_elts
; i
++)
13013 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
13014 unsigned HOST_WIDE_INT elpart
;
13016 gcc_assert (CONST_INT_P (el
));
13017 elpart
= INTVAL (el
);
13019 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
13021 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
13022 elpart
>>= BITS_PER_UNIT
;
13026 /* Sanity check. */
13027 gcc_assert (idx
== GET_MODE_SIZE (mode
));
13031 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
13032 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
13034 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
13035 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
13037 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
13038 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
13040 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
13041 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
13043 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
13045 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
13047 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
13048 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
13050 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
13051 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
13053 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13054 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
13056 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13057 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
13059 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
13061 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
13063 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
13064 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
13066 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
13067 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
13069 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13070 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
13072 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
13073 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
13075 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
13077 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
13078 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
13086 *elementwidth
= elsize
;
13090 unsigned HOST_WIDE_INT imm
= 0;
13092 /* Un-invert bytes of recognized vector, if necessary. */
13094 for (i
= 0; i
< idx
; i
++)
13095 bytes
[i
] ^= invmask
;
13099 /* FIXME: Broken on 32-bit H_W_I hosts. */
13100 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
13102 for (i
= 0; i
< 8; i
++)
13103 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
13104 << (i
* BITS_PER_UNIT
);
13106 *modconst
= GEN_INT (imm
);
13110 unsigned HOST_WIDE_INT imm
= 0;
13112 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
13113 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
13115 *modconst
= GEN_INT (imm
);
13123 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13124 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
13125 (or zero for float elements), and a modified constant (whatever should be
13126 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
13127 modified to "simd_immediate_valid_for_move" as this function will be used
13128 both by neon and mve. */
13130 simd_immediate_valid_for_move (rtx op
, machine_mode mode
,
13131 rtx
*modconst
, int *elementwidth
)
13135 int retval
= simd_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
13141 *modconst
= tmpconst
;
13144 *elementwidth
= tmpwidth
;
13149 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
13150 the immediate is valid, write a constant suitable for using as an operand
13151 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13152 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
13155 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
13156 rtx
*modconst
, int *elementwidth
)
13160 int retval
= simd_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
13162 if (retval
< 0 || retval
> 5)
13166 *modconst
= tmpconst
;
13169 *elementwidth
= tmpwidth
;
13174 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
13175 the immediate is valid, write a constant suitable for using as an operand
13176 to VSHR/VSHL to *MODCONST and the corresponding element width to
13177 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13178 because they have different limitations. */
13181 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
13182 rtx
*modconst
, int *elementwidth
,
13185 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
13186 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
13187 unsigned HOST_WIDE_INT last_elt
= 0;
13188 unsigned HOST_WIDE_INT maxshift
;
13190 /* Split vector constant out into a byte vector. */
13191 for (i
= 0; i
< n_elts
; i
++)
13193 rtx el
= CONST_VECTOR_ELT (op
, i
);
13194 unsigned HOST_WIDE_INT elpart
;
13196 if (CONST_INT_P (el
))
13197 elpart
= INTVAL (el
);
13198 else if (CONST_DOUBLE_P (el
))
13201 gcc_unreachable ();
13203 if (i
!= 0 && elpart
!= last_elt
)
13209 /* Shift less than element size. */
13210 maxshift
= innersize
* 8;
13214 /* Left shift immediate value can be from 0 to <size>-1. */
13215 if (last_elt
>= maxshift
)
13220 /* Right shift immediate value can be from 1 to <size>. */
13221 if (last_elt
== 0 || last_elt
> maxshift
)
13226 *elementwidth
= innersize
* 8;
13229 *modconst
= CONST_VECTOR_ELT (op
, 0);
13234 /* Return a string suitable for output of Neon immediate logic operation
13238 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
13239 int inverse
, int quad
)
13241 int width
, is_valid
;
13242 static char templ
[40];
13244 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
13246 gcc_assert (is_valid
!= 0);
13249 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
13251 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
13256 /* Return a string suitable for output of Neon immediate shift operation
13257 (VSHR or VSHL) MNEM. */
13260 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
13261 machine_mode mode
, int quad
,
13264 int width
, is_valid
;
13265 static char templ
[40];
13267 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
13268 gcc_assert (is_valid
!= 0);
13271 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
13273 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
13278 /* Output a sequence of pairwise operations to implement a reduction.
13279 NOTE: We do "too much work" here, because pairwise operations work on two
13280 registers-worth of operands in one go. Unfortunately we can't exploit those
13281 extra calculations to do the full operation in fewer steps, I don't think.
13282 Although all vector elements of the result but the first are ignored, we
13283 actually calculate the same result in each of the elements. An alternative
13284 such as initially loading a vector with zero to use as each of the second
13285 operands would use up an additional register and take an extra instruction,
13286 for no particular gain. */
13289 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
13290 rtx (*reduc
) (rtx
, rtx
, rtx
))
13292 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
13295 for (i
= parts
/ 2; i
>= 1; i
/= 2)
13297 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
13298 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
13303 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13304 loaded into a register using VDUP.
13306 If this is the case, and GENERATE is set, we also generate
13307 instructions to do this and return an RTX to assign to the register. */
13310 neon_vdup_constant (rtx vals
, bool generate
)
13312 machine_mode mode
= GET_MODE (vals
);
13313 machine_mode inner_mode
= GET_MODE_INNER (mode
);
13316 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
13319 if (!const_vec_duplicate_p (vals
, &x
))
13320 /* The elements are not all the same. We could handle repeating
13321 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13322 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13329 /* We can load this constant by using VDUP and a constant in a
13330 single ARM register. This will be cheaper than a vector
13333 x
= copy_to_mode_reg (inner_mode
, x
);
13334 return gen_vec_duplicate (mode
, x
);
13337 /* Return a HI representation of CONST_VEC suitable for MVE predicates. */
13339 mve_bool_vec_to_const (rtx const_vec
)
13341 machine_mode mode
= GET_MODE (const_vec
);
13343 if (!VECTOR_MODE_P (mode
))
13346 unsigned n_elts
= GET_MODE_NUNITS (mode
);
13347 unsigned el_prec
= GET_MODE_PRECISION (GET_MODE_INNER (mode
));
13348 unsigned shift_c
= 16 / n_elts
;
13352 for (i
= 0; i
< n_elts
; i
++)
13354 rtx el
= CONST_VECTOR_ELT (const_vec
, i
);
13355 unsigned HOST_WIDE_INT elpart
;
13357 gcc_assert (CONST_INT_P (el
));
13358 elpart
= INTVAL (el
) & ((1U << el_prec
) - 1);
13360 unsigned index
= BYTES_BIG_ENDIAN
? n_elts
- i
- 1 : i
;
13362 hi_val
|= elpart
<< (index
* shift_c
);
13364 /* We are using mov immediate to encode this constant which writes 32-bits
13365 so we need to make sure the top 16-bits are all 0, otherwise we can't
13366 guarantee we can actually write this immediate. */
13367 return gen_int_mode (hi_val
, SImode
);
13370 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13371 constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13374 If this is the case, and GENERATE is set, we also generate code to do
13375 this and return an RTX to copy into the register. */
13378 neon_make_constant (rtx vals
, bool generate
)
13380 machine_mode mode
= GET_MODE (vals
);
13382 rtx const_vec
= NULL_RTX
;
13383 int n_elts
= GET_MODE_NUNITS (mode
);
13387 if (GET_CODE (vals
) == CONST_VECTOR
)
13389 else if (GET_CODE (vals
) == PARALLEL
)
13391 /* A CONST_VECTOR must contain only CONST_INTs and
13392 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13393 Only store valid constants in a CONST_VECTOR. */
13394 for (i
= 0; i
< n_elts
; ++i
)
13396 rtx x
= XVECEXP (vals
, 0, i
);
13397 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
13400 if (n_const
== n_elts
)
13401 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
13404 gcc_unreachable ();
13406 if (const_vec
!= NULL
13407 && simd_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
13408 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13410 else if (TARGET_HAVE_MVE
&& VALID_MVE_PRED_MODE(mode
))
13411 return mve_bool_vec_to_const (const_vec
);
13412 else if ((target
= neon_vdup_constant (vals
, generate
)) != NULL_RTX
)
13413 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13414 pipeline cycle; creating the constant takes one or two ARM
13415 pipeline cycles. */
13417 else if (const_vec
!= NULL_RTX
)
13418 /* Load from constant pool. On Cortex-A8 this takes two cycles
13419 (for either double or quad vectors). We cannot take advantage
13420 of single-cycle VLD1 because we need a PC-relative addressing
13422 return arm_disable_literal_pool
? NULL_RTX
: const_vec
;
13424 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13425 We cannot construct an initializer. */
13429 /* Initialize vector TARGET to VALS. */
13432 neon_expand_vector_init (rtx target
, rtx vals
)
13434 machine_mode mode
= GET_MODE (target
);
13435 machine_mode inner_mode
= GET_MODE_INNER (mode
);
13436 int n_elts
= GET_MODE_NUNITS (mode
);
13437 int n_var
= 0, one_var
= -1;
13438 bool all_same
= true;
13442 for (i
= 0; i
< n_elts
; ++i
)
13444 x
= XVECEXP (vals
, 0, i
);
13445 if (!CONSTANT_P (x
))
13446 ++n_var
, one_var
= i
;
13448 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
13454 rtx constant
= neon_make_constant (vals
);
13455 if (constant
!= NULL_RTX
)
13457 emit_move_insn (target
, constant
);
13462 /* Splat a single non-constant element if we can. */
13463 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
13465 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
13466 emit_insn (gen_rtx_SET (target
, gen_vec_duplicate (mode
, x
)));
13470 /* One field is non-constant. Load constant then overwrite varying
13471 field. This is more efficient than using the stack. */
13474 rtx copy
= copy_rtx (vals
);
13475 rtx merge_mask
= GEN_INT (1 << one_var
);
13477 /* Load constant part of vector, substitute neighboring value for
13478 varying element. */
13479 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
13480 neon_expand_vector_init (target
, copy
);
13482 /* Insert variable. */
13483 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
13484 emit_insn (gen_vec_set_internal (mode
, target
, x
, merge_mask
, target
));
13488 /* Construct the vector in memory one field at a time
13489 and load the whole vector. */
13490 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
13491 for (i
= 0; i
< n_elts
; i
++)
13492 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
13493 i
* GET_MODE_SIZE (inner_mode
)),
13494 XVECEXP (vals
, 0, i
));
13495 emit_move_insn (target
, mem
);
13498 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13499 ERR if it doesn't. EXP indicates the source location, which includes the
13500 inlining history for intrinsics. */
13503 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
13504 const_tree exp
, const char *desc
)
13506 HOST_WIDE_INT lane
;
13508 gcc_assert (CONST_INT_P (operand
));
13510 lane
= INTVAL (operand
);
13512 if (lane
< low
|| lane
>= high
)
13515 error_at (EXPR_LOCATION (exp
),
13516 "%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
13518 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
13522 /* Bounds-check lanes. */
13525 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
13528 bounds_check (operand
, low
, high
, exp
, "lane");
13531 /* Bounds-check constants. */
13534 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
13536 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
13540 neon_element_bits (machine_mode mode
)
13542 return GET_MODE_UNIT_BITSIZE (mode
);
13546 /* Predicates for `match_operand' and `match_operator'. */
13548 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13549 WB level is 2 if full writeback address modes are allowed, 1
13550 if limited writeback address modes (POST_INC and PRE_DEC) are
13551 allowed and 0 if no writeback at all is supported. */
13554 arm_coproc_mem_operand_wb (rtx op
, int wb_level
)
13556 gcc_assert (wb_level
== 0 || wb_level
== 1 || wb_level
== 2);
13559 /* Reject eliminable registers. */
13560 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
13561 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13562 || reg_mentioned_p (arg_pointer_rtx
, op
)
13563 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13564 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13565 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13566 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13569 /* Constants are converted into offsets from labels. */
13573 ind
= XEXP (op
, 0);
13575 if (reload_completed
13576 && (LABEL_REF_P (ind
)
13577 || (GET_CODE (ind
) == CONST
13578 && GET_CODE (XEXP (ind
, 0)) == PLUS
13579 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13580 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13583 /* Match: (mem (reg)). */
13585 return arm_address_register_rtx_p (ind
, 0);
13587 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13588 acceptable in any case (subject to verification by
13589 arm_address_register_rtx_p). We need full writeback to accept
13590 PRE_INC and POST_DEC, and at least restricted writeback for
13591 PRE_INC and POST_DEC. */
13593 && (GET_CODE (ind
) == POST_INC
13594 || GET_CODE (ind
) == PRE_DEC
13596 && (GET_CODE (ind
) == PRE_INC
13597 || GET_CODE (ind
) == POST_DEC
))))
13598 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13601 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
13602 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
13603 && GET_CODE (XEXP (ind
, 1)) == PLUS
13604 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
13605 ind
= XEXP (ind
, 1);
13611 The encoded immediate for 16-bit modes is multiplied by 2,
13612 while the encoded immediate for 32-bit and 64-bit modes is
13613 multiplied by 4. */
13614 int factor
= MIN (GET_MODE_SIZE (GET_MODE (op
)), 4);
13615 if (GET_CODE (ind
) == PLUS
13616 && REG_P (XEXP (ind
, 0))
13617 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13618 && CONST_INT_P (XEXP (ind
, 1))
13619 && IN_RANGE (INTVAL (XEXP (ind
, 1)), -255 * factor
, 255 * factor
)
13620 && (INTVAL (XEXP (ind
, 1)) & (factor
- 1)) == 0)
13626 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13627 WB is true if full writeback address modes are allowed and is false
13628 if limited writeback address modes (POST_INC and PRE_DEC) are
13631 int arm_coproc_mem_operand (rtx op
, bool wb
)
13633 return arm_coproc_mem_operand_wb (op
, wb
? 2 : 1);
13636 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13637 context in which no writeback address modes are allowed. */
13640 arm_coproc_mem_operand_no_writeback (rtx op
)
13642 return arm_coproc_mem_operand_wb (op
, 0);
13645 /* This function returns TRUE on matching mode and op.
13646 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13647 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13649 mve_vector_mem_operand (machine_mode mode
, rtx op
, bool strict
)
13651 enum rtx_code code
;
13654 /* Match: (mem (reg)). */
13657 int reg_no
= REGNO (op
);
13658 return (((mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
)
13659 ? reg_no
<= LAST_LO_REGNUM
13660 : reg_no
< LAST_ARM_REGNUM
)
13661 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13663 code
= GET_CODE (op
);
13665 if (code
== POST_INC
|| code
== PRE_DEC
13666 || code
== PRE_INC
|| code
== POST_DEC
)
13668 reg_no
= REGNO (XEXP (op
, 0));
13669 return (((mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
)
13670 ? reg_no
<= LAST_LO_REGNUM
13671 :(reg_no
< LAST_ARM_REGNUM
&& reg_no
!= SP_REGNUM
))
13672 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13674 else if (((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
13675 && GET_CODE (XEXP (op
, 1)) == PLUS
13676 && XEXP (op
, 0) == XEXP (XEXP (op
, 1), 0)
13677 && REG_P (XEXP (op
, 0))
13678 && GET_CODE (XEXP (XEXP (op
, 1), 1)) == CONST_INT
)
13679 /* Make sure to only accept PLUS after reload_completed, otherwise
13680 this will interfere with auto_inc's pattern detection. */
13681 || (reload_completed
&& code
== PLUS
&& REG_P (XEXP (op
, 0))
13682 && GET_CODE (XEXP (op
, 1)) == CONST_INT
))
13684 reg_no
= REGNO (XEXP (op
, 0));
13686 val
= INTVAL (XEXP (op
, 1));
13688 val
= INTVAL (XEXP(XEXP (op
, 1), 1));
13695 if (abs (val
) > 127)
13702 if (val
% 2 != 0 || abs (val
) > 254)
13707 if (val
% 4 != 0 || abs (val
) > 508)
13713 return ((!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
)
13714 || (MVE_STN_LDW_MODE (mode
)
13715 ? reg_no
<= LAST_LO_REGNUM
13716 : (reg_no
< LAST_ARM_REGNUM
13717 && (code
== PLUS
|| reg_no
!= SP_REGNUM
))));
13722 /* Return TRUE if OP is a memory operand which we can load or store a vector
13723 to/from. TYPE is one of the following values:
13724 0 - Vector load/stor (vldr)
13725 1 - Core registers (ldm)
13726 2 - Element/structure loads (vld1)
13729 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
13733 /* Reject eliminable registers. */
13734 if (strict
&& ! (reload_in_progress
|| reload_completed
)
13735 && (reg_mentioned_p (frame_pointer_rtx
, op
)
13736 || reg_mentioned_p (arg_pointer_rtx
, op
)
13737 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13738 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13739 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13740 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13743 /* Constants are converted into offsets from labels. */
13747 ind
= XEXP (op
, 0);
13749 if (reload_completed
13750 && (LABEL_REF_P (ind
)
13751 || (GET_CODE (ind
) == CONST
13752 && GET_CODE (XEXP (ind
, 0)) == PLUS
13753 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13754 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13757 /* Match: (mem (reg)). */
13759 return arm_address_register_rtx_p (ind
, 0);
13761 /* Allow post-increment with Neon registers. */
13762 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
13763 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
13764 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13766 /* Allow post-increment by register for VLDn */
13767 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
13768 && GET_CODE (XEXP (ind
, 1)) == PLUS
13769 && REG_P (XEXP (XEXP (ind
, 1), 1))
13770 && REG_P (XEXP (ind
, 0))
13771 && rtx_equal_p (XEXP (ind
, 0), XEXP (XEXP (ind
, 1), 0)))
13778 && GET_CODE (ind
) == PLUS
13779 && REG_P (XEXP (ind
, 0))
13780 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13781 && CONST_INT_P (XEXP (ind
, 1))
13782 && INTVAL (XEXP (ind
, 1)) > -1024
13783 /* For quad modes, we restrict the constant offset to be slightly less
13784 than what the instruction format permits. We have no such constraint
13785 on double mode offsets. (This must match arm_legitimate_index_p.) */
13786 && (INTVAL (XEXP (ind
, 1))
13787 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
13788 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13794 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13797 mve_struct_mem_operand (rtx op
)
13799 rtx ind
= XEXP (op
, 0);
13801 /* Match: (mem (reg)). */
13803 return arm_address_register_rtx_p (ind
, 0);
13805 /* Allow only post-increment by the mode size. */
13806 if (GET_CODE (ind
) == POST_INC
)
13807 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13812 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13815 neon_struct_mem_operand (rtx op
)
13819 /* Reject eliminable registers. */
13820 if (! (reload_in_progress
|| reload_completed
)
13821 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13822 || reg_mentioned_p (arg_pointer_rtx
, op
)
13823 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13824 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13825 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13826 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13829 /* Constants are converted into offsets from labels. */
13833 ind
= XEXP (op
, 0);
13835 if (reload_completed
13836 && (LABEL_REF_P (ind
)
13837 || (GET_CODE (ind
) == CONST
13838 && GET_CODE (XEXP (ind
, 0)) == PLUS
13839 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13840 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13843 /* Match: (mem (reg)). */
13845 return arm_address_register_rtx_p (ind
, 0);
13847 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13848 if (GET_CODE (ind
) == POST_INC
13849 || GET_CODE (ind
) == PRE_DEC
)
13850 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13855 /* Prepares the operands for the VCMLA by lane instruction such that the right
13856 register number is selected. This instruction is special in that it always
13857 requires a D register, however there is a choice to be made between Dn[0],
13858 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13860 The VCMLA by lane function always selects two values. For instance given D0
13861 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13862 used by the instruction. However given V4SF then index 0 and 1 are valid as
13863 D0[0] or D1[0] are both valid.
13865 This function centralizes that information based on OPERANDS, OPERANDS[3]
13866 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13867 updated to contain the right index. */
13870 neon_vcmla_lane_prepare_operands (rtx
*operands
)
13872 int lane
= INTVAL (operands
[4]);
13873 machine_mode constmode
= SImode
;
13874 machine_mode mode
= GET_MODE (operands
[3]);
13875 int regno
= REGNO (operands
[3]);
13876 regno
= ((regno
- FIRST_VFP_REGNUM
) >> 1);
13877 if (lane
> 0 && lane
>= GET_MODE_NUNITS (mode
) / 4)
13879 operands
[3] = gen_int_mode (regno
+ 1, constmode
);
13881 = gen_int_mode (lane
- GET_MODE_NUNITS (mode
) / 4, constmode
);
13885 operands
[3] = gen_int_mode (regno
, constmode
);
13886 operands
[4] = gen_int_mode (lane
, constmode
);
13892 /* Return true if X is a register that will be eliminated later on. */
13894 arm_eliminable_register (rtx x
)
13896 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13897 || REGNO (x
) == ARG_POINTER_REGNUM
13898 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
13899 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
13902 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13903 coprocessor registers. Otherwise return NO_REGS. */
13906 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13908 if (mode
== HFmode
)
13910 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
13911 return GENERAL_REGS
;
13912 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13914 return GENERAL_REGS
;
13917 /* The neon move patterns handle all legitimate vector and struct
13920 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13921 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13922 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13923 || VALID_NEON_STRUCT_MODE (mode
)))
13926 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13929 return GENERAL_REGS
;
13932 /* Values which must be returned in the most-significant end of the return
13936 arm_return_in_msb (const_tree valtype
)
13938 return (TARGET_AAPCS_BASED
13939 && BYTES_BIG_ENDIAN
13940 && (AGGREGATE_TYPE_P (valtype
)
13941 || TREE_CODE (valtype
) == COMPLEX_TYPE
13942 || FIXED_POINT_TYPE_P (valtype
)));
13945 /* Return TRUE if X references a SYMBOL_REF. */
13947 symbol_mentioned_p (rtx x
)
13952 if (SYMBOL_REF_P (x
))
13955 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13956 are constant offsets, not symbols. */
13957 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13960 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13962 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13968 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13969 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13972 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13979 /* Return TRUE if X references a LABEL_REF. */
13981 label_mentioned_p (rtx x
)
13986 if (LABEL_REF_P (x
))
13989 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13990 instruction, but they are constant offsets, not symbols. */
13991 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13994 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13995 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
14001 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
14002 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
14005 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
14013 tls_mentioned_p (rtx x
)
14015 switch (GET_CODE (x
))
14018 return tls_mentioned_p (XEXP (x
, 0));
14021 if (XINT (x
, 1) == UNSPEC_TLS
)
14024 /* Fall through. */
14030 /* Must not copy any rtx that uses a pc-relative address.
14031 Also, disallow copying of load-exclusive instructions that
14032 may appear after splitting of compare-and-swap-style operations
14033 so as to prevent those loops from being transformed away from their
14034 canonical forms (see PR 69904). */
14037 arm_cannot_copy_insn_p (rtx_insn
*insn
)
14039 /* The tls call insn cannot be copied, as it is paired with a data
14041 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
14044 subrtx_iterator::array_type array
;
14045 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
14047 const_rtx x
= *iter
;
14048 if (GET_CODE (x
) == UNSPEC
14049 && (XINT (x
, 1) == UNSPEC_PIC_BASE
14050 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
14054 rtx set
= single_set (insn
);
14057 rtx src
= SET_SRC (set
);
14058 if (GET_CODE (src
) == ZERO_EXTEND
)
14059 src
= XEXP (src
, 0);
14061 /* Catch the load-exclusive and load-acquire operations. */
14062 if (GET_CODE (src
) == UNSPEC_VOLATILE
14063 && (XINT (src
, 1) == VUNSPEC_LL
14064 || XINT (src
, 1) == VUNSPEC_LAX
))
14071 minmax_code (rtx x
)
14073 enum rtx_code code
= GET_CODE (x
);
14086 gcc_unreachable ();
14090 /* Match pair of min/max operators that can be implemented via usat/ssat. */
14093 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
14094 int *mask
, bool *signed_sat
)
14096 /* The high bound must be a power of two minus one. */
14097 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
14101 /* The low bound is either zero (for usat) or one less than the
14102 negation of the high bound (for ssat). */
14103 if (INTVAL (lo_bound
) == 0)
14108 *signed_sat
= false;
14113 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
14118 *signed_sat
= true;
14126 /* Return 1 if memory locations are adjacent. */
14128 adjacent_mem_locations (rtx a
, rtx b
)
14130 /* We don't guarantee to preserve the order of these memory refs. */
14131 if (volatile_refs_p (a
) || volatile_refs_p (b
))
14134 if ((REG_P (XEXP (a
, 0))
14135 || (GET_CODE (XEXP (a
, 0)) == PLUS
14136 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
14137 && (REG_P (XEXP (b
, 0))
14138 || (GET_CODE (XEXP (b
, 0)) == PLUS
14139 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
14141 HOST_WIDE_INT val0
= 0, val1
= 0;
14145 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
14147 reg0
= XEXP (XEXP (a
, 0), 0);
14148 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
14151 reg0
= XEXP (a
, 0);
14153 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
14155 reg1
= XEXP (XEXP (b
, 0), 0);
14156 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
14159 reg1
= XEXP (b
, 0);
14161 /* Don't accept any offset that will require multiple
14162 instructions to handle, since this would cause the
14163 arith_adjacentmem pattern to output an overlong sequence. */
14164 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
14167 /* Don't allow an eliminable register: register elimination can make
14168 the offset too large. */
14169 if (arm_eliminable_register (reg0
))
14172 val_diff
= val1
- val0
;
14176 /* If the target has load delay slots, then there's no benefit
14177 to using an ldm instruction unless the offset is zero and
14178 we are optimizing for size. */
14179 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
14180 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
14181 && (val_diff
== 4 || val_diff
== -4));
14184 return ((REGNO (reg0
) == REGNO (reg1
))
14185 && (val_diff
== 4 || val_diff
== -4));
14191 /* Return true if OP is a valid load or store multiple operation. LOAD is true
14192 for load operations, false for store operations. CONSECUTIVE is true
14193 if the register numbers in the operation must be consecutive in the register
14194 bank. RETURN_PC is true if value is to be loaded in PC.
14195 The pattern we are trying to match for load is:
14196 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14197 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14200 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14203 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14204 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14205 3. If consecutive is TRUE, then for kth register being loaded,
14206 REGNO (R_dk) = REGNO (R_d0) + k.
14207 The pattern for store is similar. */
14209 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
14210 bool consecutive
, bool return_pc
)
14212 HOST_WIDE_INT count
= XVECLEN (op
, 0);
14213 rtx reg
, mem
, addr
;
14215 unsigned first_regno
;
14216 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
14218 bool addr_reg_in_reglist
= false;
14219 bool update
= false;
14224 /* If not in SImode, then registers must be consecutive
14225 (e.g., VLDM instructions for DFmode). */
14226 gcc_assert ((mode
== SImode
) || consecutive
);
14227 /* Setting return_pc for stores is illegal. */
14228 gcc_assert (!return_pc
|| load
);
14230 /* Set up the increments and the regs per val based on the mode. */
14231 reg_increment
= GET_MODE_SIZE (mode
);
14232 regs_per_val
= reg_increment
/ 4;
14233 offset_adj
= return_pc
? 1 : 0;
14236 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
14237 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
14240 /* Check if this is a write-back. */
14241 elt
= XVECEXP (op
, 0, offset_adj
);
14242 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
14248 /* The offset adjustment must be the number of registers being
14249 popped times the size of a single register. */
14250 if (!REG_P (SET_DEST (elt
))
14251 || !REG_P (XEXP (SET_SRC (elt
), 0))
14252 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
14253 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
14254 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
14255 ((count
- 1 - offset_adj
) * reg_increment
))
14259 i
= i
+ offset_adj
;
14260 base
= base
+ offset_adj
;
14261 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14262 success depends on the type: VLDM can do just one reg,
14263 LDM must do at least two. */
14264 if ((count
<= i
) && (mode
== SImode
))
14267 elt
= XVECEXP (op
, 0, i
- 1);
14268 if (GET_CODE (elt
) != SET
)
14273 reg
= SET_DEST (elt
);
14274 mem
= SET_SRC (elt
);
14278 reg
= SET_SRC (elt
);
14279 mem
= SET_DEST (elt
);
14282 if (!REG_P (reg
) || !MEM_P (mem
))
14285 regno
= REGNO (reg
);
14286 first_regno
= regno
;
14287 addr
= XEXP (mem
, 0);
14288 if (GET_CODE (addr
) == PLUS
)
14290 if (!CONST_INT_P (XEXP (addr
, 1)))
14293 offset
= INTVAL (XEXP (addr
, 1));
14294 addr
= XEXP (addr
, 0);
14300 /* Don't allow SP to be loaded unless it is also the base register. It
14301 guarantees that SP is reset correctly when an LDM instruction
14302 is interrupted. Otherwise, we might end up with a corrupt stack. */
14303 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
14306 if (regno
== REGNO (addr
))
14307 addr_reg_in_reglist
= true;
14309 for (; i
< count
; i
++)
14311 elt
= XVECEXP (op
, 0, i
);
14312 if (GET_CODE (elt
) != SET
)
14317 reg
= SET_DEST (elt
);
14318 mem
= SET_SRC (elt
);
14322 reg
= SET_SRC (elt
);
14323 mem
= SET_DEST (elt
);
14327 || GET_MODE (reg
) != mode
14328 || REGNO (reg
) <= regno
14331 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
14332 /* Don't allow SP to be loaded unless it is also the base register. It
14333 guarantees that SP is reset correctly when an LDM instruction
14334 is interrupted. Otherwise, we might end up with a corrupt stack. */
14335 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
14337 || GET_MODE (mem
) != mode
14338 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
14339 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
14340 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
14341 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
14342 offset
+ (i
- base
) * reg_increment
))
14343 && (!REG_P (XEXP (mem
, 0))
14344 || offset
+ (i
- base
) * reg_increment
!= 0)))
14347 regno
= REGNO (reg
);
14348 if (regno
== REGNO (addr
))
14349 addr_reg_in_reglist
= true;
14354 if (update
&& addr_reg_in_reglist
)
14357 /* For Thumb-1, address register is always modified - either by write-back
14358 or by explicit load. If the pattern does not describe an update,
14359 then the address register must be in the list of loaded registers. */
14361 return update
|| addr_reg_in_reglist
;
14367 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14368 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14371 [(set (reg:SI <N>) (const_int 0))
14372 (set (reg:SI <M>) (const_int 0))
14374 (unspec_volatile [(const_int 0)]
14376 (clobber (reg:CC CC_REGNUM))
14379 Any number (including 0) of set expressions is valid, the volatile unspec is
14380 optional. All registers but SP and PC are allowed and registers must be in
14381 strict increasing order.
14383 To be a valid VSCCLRM pattern, OP must have the following form:
14385 [(unspec_volatile [(const_int 0)]
14386 VUNSPEC_VSCCLRM_VPR)
14387 (set (reg:SF <N>) (const_int 0))
14388 (set (reg:SF <M>) (const_int 0))
14392 As with CLRM, any number (including 0) of set expressions is valid, however
14393 the volatile unspec is mandatory here. Any VFP single-precision register is
14394 accepted but all registers must be consecutive and in increasing order. */
14397 clear_operation_p (rtx op
, bool vfp
)
14400 unsigned last_regno
= INVALID_REGNUM
;
14401 rtx elt
, reg
, zero
;
14402 int count
= XVECLEN (op
, 0);
14403 int first_set
= vfp
? 1 : 0;
14404 machine_mode expected_mode
= vfp
? E_SFmode
: E_SImode
;
14406 for (int i
= first_set
; i
< count
; i
++)
14408 elt
= XVECEXP (op
, 0, i
);
14410 if (!vfp
&& GET_CODE (elt
) == UNSPEC_VOLATILE
)
14412 if (XINT (elt
, 1) != VUNSPEC_CLRM_APSR
14413 || XVECLEN (elt
, 0) != 1
14414 || XVECEXP (elt
, 0, 0) != CONST0_RTX (SImode
)
14421 if (GET_CODE (elt
) == CLOBBER
)
14424 if (GET_CODE (elt
) != SET
)
14427 reg
= SET_DEST (elt
);
14428 zero
= SET_SRC (elt
);
14431 || GET_MODE (reg
) != expected_mode
14432 || zero
!= CONST0_RTX (SImode
))
14435 regno
= REGNO (reg
);
14439 if (i
!= first_set
&& regno
!= last_regno
+ 1)
14444 if (regno
== SP_REGNUM
|| regno
== PC_REGNUM
)
14446 if (i
!= first_set
&& regno
<= last_regno
)
14450 last_regno
= regno
;
14456 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14457 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14458 instruction. ADD_OFFSET is nonzero if the base address register needs
14459 to be modified with an add instruction before we can use it. */
14462 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
14463 int nops
, HOST_WIDE_INT add_offset
)
14465 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14466 if the offset isn't small enough. The reason 2 ldrs are faster
14467 is because these ARMs are able to do more than one cache access
14468 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14469 whilst the ARM8 has a double bandwidth cache. This means that
14470 these cores can do both an instruction fetch and a data fetch in
14471 a single cycle, so the trick of calculating the address into a
14472 scratch register (one of the result regs) and then doing a load
14473 multiple actually becomes slower (and no smaller in code size).
14474 That is the transformation
14476 ldr rd1, [rbase + offset]
14477 ldr rd2, [rbase + offset + 4]
14481 add rd1, rbase, offset
14482 ldmia rd1, {rd1, rd2}
14484 produces worse code -- '3 cycles + any stalls on rd2' instead of
14485 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14486 access per cycle, the first sequence could never complete in less
14487 than 6 cycles, whereas the ldm sequence would only take 5 and
14488 would make better use of sequential accesses if not hitting the
14491 We cheat here and test 'arm_ld_sched' which we currently know to
14492 only be true for the ARM8, ARM9 and StrongARM. If this ever
14493 changes, then the test below needs to be reworked. */
14494 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
14497 /* XScale has load-store double instructions, but they have stricter
14498 alignment requirements than load-store multiple, so we cannot
14501 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14502 the pipeline until completion.
14510 An ldr instruction takes 1-3 cycles, but does not block the
14519 Best case ldr will always win. However, the more ldr instructions
14520 we issue, the less likely we are to be able to schedule them well.
14521 Using ldr instructions also increases code size.
14523 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14524 for counts of 3 or 4 regs. */
14525 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
14530 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14531 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14532 an array ORDER which describes the sequence to use when accessing the
14533 offsets that produces an ascending order. In this sequence, each
14534 offset must be larger by exactly 4 than the previous one. ORDER[0]
14535 must have been filled in with the lowest offset by the caller.
14536 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14537 we use to verify that ORDER produces an ascending order of registers.
14538 Return true if it was possible to construct such an order, false if
14542 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
14543 int *unsorted_regs
)
14546 for (i
= 1; i
< nops
; i
++)
14550 order
[i
] = order
[i
- 1];
14551 for (j
= 0; j
< nops
; j
++)
14552 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
14554 /* We must find exactly one offset that is higher than the
14555 previous one by 4. */
14556 if (order
[i
] != order
[i
- 1])
14560 if (order
[i
] == order
[i
- 1])
14562 /* The register numbers must be ascending. */
14563 if (unsorted_regs
!= NULL
14564 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
14570 /* Used to determine in a peephole whether a sequence of load
14571 instructions can be changed into a load-multiple instruction.
14572 NOPS is the number of separate load instructions we are examining. The
14573 first NOPS entries in OPERANDS are the destination registers, the
14574 next NOPS entries are memory operands. If this function is
14575 successful, *BASE is set to the common base register of the memory
14576 accesses; *LOAD_OFFSET is set to the first memory location's offset
14577 from that base register.
14578 REGS is an array filled in with the destination register numbers.
14579 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14580 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14581 the sequence of registers in REGS matches the loads from ascending memory
14582 locations, and the function verifies that the register numbers are
14583 themselves ascending. If CHECK_REGS is false, the register numbers
14584 are stored in the order they are found in the operands. */
14586 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
14587 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
14589 int unsorted_regs
[MAX_LDM_STM_OPS
];
14590 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
14591 int order
[MAX_LDM_STM_OPS
];
14595 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14596 easily extended if required. */
14597 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
14599 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
14601 /* Loop over the operands and check that the memory references are
14602 suitable (i.e. immediate offsets from the same base register). At
14603 the same time, extract the target register, and the memory
14605 for (i
= 0; i
< nops
; i
++)
14610 /* Convert a subreg of a mem into the mem itself. */
14611 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
14612 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
14614 gcc_assert (MEM_P (operands
[nops
+ i
]));
14616 /* Don't reorder volatile memory references; it doesn't seem worth
14617 looking for the case where the order is ok anyway. */
14618 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
14621 offset
= const0_rtx
;
14623 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
14625 && REG_P (reg
= SUBREG_REG (reg
))))
14626 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
14627 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
14629 && REG_P (reg
= SUBREG_REG (reg
))))
14630 && (CONST_INT_P (offset
14631 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
14635 base_reg
= REGNO (reg
);
14636 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
14639 else if (base_reg
!= (int) REGNO (reg
))
14640 /* Not addressed from the same base register. */
14643 unsorted_regs
[i
] = (REG_P (operands
[i
])
14644 ? REGNO (operands
[i
])
14645 : REGNO (SUBREG_REG (operands
[i
])));
14647 /* If it isn't an integer register, or if it overwrites the
14648 base register but isn't the last insn in the list, then
14649 we can't do this. */
14650 if (unsorted_regs
[i
] < 0
14651 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
14652 || unsorted_regs
[i
] > 14
14653 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
14656 /* Don't allow SP to be loaded unless it is also the base
14657 register. It guarantees that SP is reset correctly when
14658 an LDM instruction is interrupted. Otherwise, we might
14659 end up with a corrupt stack. */
14660 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
14663 unsorted_offsets
[i
] = INTVAL (offset
);
14664 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
14668 /* Not a suitable memory address. */
14672 /* All the useful information has now been extracted from the
14673 operands into unsorted_regs and unsorted_offsets; additionally,
14674 order[0] has been set to the lowest offset in the list. Sort
14675 the offsets into order, verifying that they are adjacent, and
14676 check that the register numbers are ascending. */
14677 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
14678 check_regs
? unsorted_regs
: NULL
))
14682 memcpy (saved_order
, order
, sizeof order
);
14688 for (i
= 0; i
< nops
; i
++)
14689 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
14691 *load_offset
= unsorted_offsets
[order
[0]];
14694 if (unsorted_offsets
[order
[0]] == 0)
14695 ldm_case
= 1; /* ldmia */
14696 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14697 ldm_case
= 2; /* ldmib */
14698 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14699 ldm_case
= 3; /* ldmda */
14700 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14701 ldm_case
= 4; /* ldmdb */
14702 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
14703 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
14708 if (!multiple_operation_profitable_p (false, nops
,
14710 ? unsorted_offsets
[order
[0]] : 0))
14716 /* Used to determine in a peephole whether a sequence of store instructions can
14717 be changed into a store-multiple instruction.
14718 NOPS is the number of separate store instructions we are examining.
14719 NOPS_TOTAL is the total number of instructions recognized by the peephole
14721 The first NOPS entries in OPERANDS are the source registers, the next
14722 NOPS entries are memory operands. If this function is successful, *BASE is
14723 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14724 to the first memory location's offset from that base register. REGS is an
14725 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14726 likewise filled with the corresponding rtx's.
14727 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14728 numbers to an ascending order of stores.
14729 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14730 from ascending memory locations, and the function verifies that the register
14731 numbers are themselves ascending. If CHECK_REGS is false, the register
14732 numbers are stored in the order they are found in the operands. */
14734 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
14735 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
14736 HOST_WIDE_INT
*load_offset
, bool check_regs
)
14738 int unsorted_regs
[MAX_LDM_STM_OPS
];
14739 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
14740 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
14741 int order
[MAX_LDM_STM_OPS
];
14743 rtx base_reg_rtx
= NULL
;
14746 /* Write back of base register is currently only supported for Thumb 1. */
14747 int base_writeback
= TARGET_THUMB1
;
14749 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14750 easily extended if required. */
14751 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
14753 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
14755 /* Loop over the operands and check that the memory references are
14756 suitable (i.e. immediate offsets from the same base register). At
14757 the same time, extract the target register, and the memory
14759 for (i
= 0; i
< nops
; i
++)
14764 /* Convert a subreg of a mem into the mem itself. */
14765 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
14766 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
14768 gcc_assert (MEM_P (operands
[nops
+ i
]));
14770 /* Don't reorder volatile memory references; it doesn't seem worth
14771 looking for the case where the order is ok anyway. */
14772 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
14775 offset
= const0_rtx
;
14777 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
14779 && REG_P (reg
= SUBREG_REG (reg
))))
14780 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
14781 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
14783 && REG_P (reg
= SUBREG_REG (reg
))))
14784 && (CONST_INT_P (offset
14785 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
14787 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
14788 ? operands
[i
] : SUBREG_REG (operands
[i
]));
14789 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
14793 base_reg
= REGNO (reg
);
14794 base_reg_rtx
= reg
;
14795 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
14798 else if (base_reg
!= (int) REGNO (reg
))
14799 /* Not addressed from the same base register. */
14802 /* If it isn't an integer register, then we can't do this. */
14803 if (unsorted_regs
[i
] < 0
14804 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
14805 /* The effects are unpredictable if the base register is
14806 both updated and stored. */
14807 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
14808 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
14809 || unsorted_regs
[i
] > 14)
14812 unsorted_offsets
[i
] = INTVAL (offset
);
14813 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
14817 /* Not a suitable memory address. */
14821 /* All the useful information has now been extracted from the
14822 operands into unsorted_regs and unsorted_offsets; additionally,
14823 order[0] has been set to the lowest offset in the list. Sort
14824 the offsets into order, verifying that they are adjacent, and
14825 check that the register numbers are ascending. */
14826 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
14827 check_regs
? unsorted_regs
: NULL
))
14831 memcpy (saved_order
, order
, sizeof order
);
14837 for (i
= 0; i
< nops
; i
++)
14839 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
14841 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
14844 *load_offset
= unsorted_offsets
[order
[0]];
14848 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
14851 if (unsorted_offsets
[order
[0]] == 0)
14852 stm_case
= 1; /* stmia */
14853 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14854 stm_case
= 2; /* stmib */
14855 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14856 stm_case
= 3; /* stmda */
14857 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14858 stm_case
= 4; /* stmdb */
14862 if (!multiple_operation_profitable_p (false, nops
, 0))
14868 /* Routines for use in generating RTL. */
14870 /* Generate a load-multiple instruction. COUNT is the number of loads in
14871 the instruction; REGS and MEMS are arrays containing the operands.
14872 BASEREG is the base register to be used in addressing the memory operands.
14873 WBACK_OFFSET is nonzero if the instruction should update the base
14877 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14878 HOST_WIDE_INT wback_offset
)
14883 if (!multiple_operation_profitable_p (false, count
, 0))
14889 for (i
= 0; i
< count
; i
++)
14890 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
14892 if (wback_offset
!= 0)
14893 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14895 seq
= get_insns ();
14901 result
= gen_rtx_PARALLEL (VOIDmode
,
14902 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14903 if (wback_offset
!= 0)
14905 XVECEXP (result
, 0, 0)
14906 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14911 for (j
= 0; i
< count
; i
++, j
++)
14912 XVECEXP (result
, 0, i
)
14913 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
14918 /* Generate a store-multiple instruction. COUNT is the number of stores in
14919 the instruction; REGS and MEMS are arrays containing the operands.
14920 BASEREG is the base register to be used in addressing the memory operands.
14921 WBACK_OFFSET is nonzero if the instruction should update the base
14925 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14926 HOST_WIDE_INT wback_offset
)
14931 if (GET_CODE (basereg
) == PLUS
)
14932 basereg
= XEXP (basereg
, 0);
14934 if (!multiple_operation_profitable_p (false, count
, 0))
14940 for (i
= 0; i
< count
; i
++)
14941 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
14943 if (wback_offset
!= 0)
14944 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14946 seq
= get_insns ();
14952 result
= gen_rtx_PARALLEL (VOIDmode
,
14953 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14954 if (wback_offset
!= 0)
14956 XVECEXP (result
, 0, 0)
14957 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14962 for (j
= 0; i
< count
; i
++, j
++)
14963 XVECEXP (result
, 0, i
)
14964 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
14969 /* Generate either a load-multiple or a store-multiple instruction. This
14970 function can be used in situations where we can start with a single MEM
14971 rtx and adjust its address upwards.
14972 COUNT is the number of operations in the instruction, not counting a
14973 possible update of the base register. REGS is an array containing the
14975 BASEREG is the base register to be used in addressing the memory operands,
14976 which are constructed from BASEMEM.
14977 WRITE_BACK specifies whether the generated instruction should include an
14978 update of the base register.
14979 OFFSETP is used to pass an offset to and from this function; this offset
14980 is not used when constructing the address (instead BASEMEM should have an
14981 appropriate offset in its address), it is used only for setting
14982 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14985 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
14986 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
14988 rtx mems
[MAX_LDM_STM_OPS
];
14989 HOST_WIDE_INT offset
= *offsetp
;
14992 gcc_assert (count
<= MAX_LDM_STM_OPS
);
14994 if (GET_CODE (basereg
) == PLUS
)
14995 basereg
= XEXP (basereg
, 0);
14997 for (i
= 0; i
< count
; i
++)
14999 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
15000 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
15008 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
15009 write_back
? 4 * count
: 0);
15011 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
15012 write_back
? 4 * count
: 0);
15016 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
15017 rtx basemem
, HOST_WIDE_INT
*offsetp
)
15019 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
15024 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
15025 rtx basemem
, HOST_WIDE_INT
*offsetp
)
15027 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
15031 /* Called from a peephole2 expander to turn a sequence of loads into an
15032 LDM instruction. OPERANDS are the operands found by the peephole matcher;
15033 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
15034 is true if we can reorder the registers because they are used commutatively
15036 Returns true iff we could generate a new instruction. */
15039 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
15041 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
15042 rtx mems
[MAX_LDM_STM_OPS
];
15043 int i
, j
, base_reg
;
15045 HOST_WIDE_INT offset
;
15046 int write_back
= FALSE
;
15050 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
15051 &base_reg
, &offset
, !sort_regs
);
15057 for (i
= 0; i
< nops
- 1; i
++)
15058 for (j
= i
+ 1; j
< nops
; j
++)
15059 if (regs
[i
] > regs
[j
])
15065 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15069 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
15071 /* Thumb-1 ldm uses writeback except if the base is loaded. */
15073 for (i
= 0; i
< nops
; i
++)
15074 if (base_reg
== regs
[i
])
15075 write_back
= false;
15077 /* Ensure the base is dead if it is updated. */
15078 if (write_back
&& !peep2_reg_dead_p (nops
, base_reg_rtx
))
15084 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
15085 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
15087 base_reg_rtx
= newbase
;
15090 for (i
= 0; i
< nops
; i
++)
15092 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15093 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15096 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
15097 write_back
? offset
+ i
* 4 : 0));
15101 /* Called from a peephole2 expander to turn a sequence of stores into an
15102 STM instruction. OPERANDS are the operands found by the peephole matcher;
15103 NOPS indicates how many separate stores we are trying to combine.
15104 Returns true iff we could generate a new instruction. */
15107 gen_stm_seq (rtx
*operands
, int nops
)
15110 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
15111 rtx mems
[MAX_LDM_STM_OPS
];
15114 HOST_WIDE_INT offset
;
15115 int write_back
= FALSE
;
15118 bool base_reg_dies
;
15120 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
15121 mem_order
, &base_reg
, &offset
, true);
15126 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15128 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
15131 gcc_assert (base_reg_dies
);
15137 gcc_assert (base_reg_dies
);
15138 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
15142 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
15144 for (i
= 0; i
< nops
; i
++)
15146 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15147 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15150 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
15151 write_back
? offset
+ i
* 4 : 0));
15155 /* Called from a peephole2 expander to turn a sequence of stores that are
15156 preceded by constant loads into an STM instruction. OPERANDS are the
15157 operands found by the peephole matcher; NOPS indicates how many
15158 separate stores we are trying to combine; there are 2 * NOPS
15159 instructions in the peephole.
15160 Returns true iff we could generate a new instruction. */
15163 gen_const_stm_seq (rtx
*operands
, int nops
)
15165 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
15166 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
15167 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
15168 rtx mems
[MAX_LDM_STM_OPS
];
15171 HOST_WIDE_INT offset
;
15172 int write_back
= FALSE
;
15175 bool base_reg_dies
;
15177 HARD_REG_SET allocated
;
15179 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
15180 mem_order
, &base_reg
, &offset
, false);
15185 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
15187 /* If the same register is used more than once, try to find a free
15189 CLEAR_HARD_REG_SET (allocated
);
15190 for (i
= 0; i
< nops
; i
++)
15192 for (j
= i
+ 1; j
< nops
; j
++)
15193 if (regs
[i
] == regs
[j
])
15195 rtx t
= peep2_find_free_register (0, nops
* 2,
15196 TARGET_THUMB1
? "l" : "r",
15197 SImode
, &allocated
);
15201 regs
[i
] = REGNO (t
);
15205 /* Compute an ordering that maps the register numbers to an ascending
15208 for (i
= 0; i
< nops
; i
++)
15209 if (regs
[i
] < regs
[reg_order
[0]])
15212 for (i
= 1; i
< nops
; i
++)
15214 int this_order
= reg_order
[i
- 1];
15215 for (j
= 0; j
< nops
; j
++)
15216 if (regs
[j
] > regs
[reg_order
[i
- 1]]
15217 && (this_order
== reg_order
[i
- 1]
15218 || regs
[j
] < regs
[this_order
]))
15220 reg_order
[i
] = this_order
;
15223 /* Ensure that registers that must be live after the instruction end
15224 up with the correct value. */
15225 for (i
= 0; i
< nops
; i
++)
15227 int this_order
= reg_order
[i
];
15228 if ((this_order
!= mem_order
[i
]
15229 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
15230 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
15234 /* Load the constants. */
15235 for (i
= 0; i
< nops
; i
++)
15237 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
15238 sorted_regs
[i
] = regs
[reg_order
[i
]];
15239 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
15242 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15244 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
15247 gcc_assert (base_reg_dies
);
15253 gcc_assert (base_reg_dies
);
15254 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
15258 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
15260 for (i
= 0; i
< nops
; i
++)
15262 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15263 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15266 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
15267 write_back
? offset
+ i
* 4 : 0));
15271 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15272 unaligned copies on processors which support unaligned semantics for those
15273 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
15274 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15275 An interleave factor of 1 (the minimum) will perform no interleaving.
15276 Load/store multiple are used for aligned addresses where possible. */
15279 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
15280 HOST_WIDE_INT length
,
15281 unsigned int interleave_factor
)
15283 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
15284 int *regnos
= XALLOCAVEC (int, interleave_factor
);
15285 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
15286 HOST_WIDE_INT i
, j
;
15287 HOST_WIDE_INT remaining
= length
, words
;
15288 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
15290 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
15291 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
15292 HOST_WIDE_INT srcoffset
, dstoffset
;
15293 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
15296 gcc_assert (interleave_factor
>= 1 && interleave_factor
<= 4);
15298 /* Use hard registers if we have aligned source or destination so we can use
15299 load/store multiple with contiguous registers. */
15300 if (dst_aligned
|| src_aligned
)
15301 for (i
= 0; i
< interleave_factor
; i
++)
15302 regs
[i
] = gen_rtx_REG (SImode
, i
);
15304 for (i
= 0; i
< interleave_factor
; i
++)
15305 regs
[i
] = gen_reg_rtx (SImode
);
15307 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
15308 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
15310 srcoffset
= dstoffset
= 0;
15312 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15313 For copying the last bytes we want to subtract this offset again. */
15314 src_autoinc
= dst_autoinc
= 0;
15316 for (i
= 0; i
< interleave_factor
; i
++)
15319 /* Copy BLOCK_SIZE_BYTES chunks. */
15321 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
15324 if (src_aligned
&& interleave_factor
> 1)
15326 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
15327 TRUE
, srcbase
, &srcoffset
));
15328 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
15332 for (j
= 0; j
< interleave_factor
; j
++)
15334 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
15336 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
15337 srcoffset
+ j
* UNITS_PER_WORD
);
15338 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
15340 srcoffset
+= block_size_bytes
;
15344 if (dst_aligned
&& interleave_factor
> 1)
15346 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
15347 TRUE
, dstbase
, &dstoffset
));
15348 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
15352 for (j
= 0; j
< interleave_factor
; j
++)
15354 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
15356 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
15357 dstoffset
+ j
* UNITS_PER_WORD
);
15358 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
15360 dstoffset
+= block_size_bytes
;
15363 remaining
-= block_size_bytes
;
15366 /* Copy any whole words left (note these aren't interleaved with any
15367 subsequent halfword/byte load/stores in the interests of simplicity). */
15369 words
= remaining
/ UNITS_PER_WORD
;
15371 gcc_assert (words
< interleave_factor
);
15373 if (src_aligned
&& words
> 1)
15375 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
15377 src_autoinc
+= UNITS_PER_WORD
* words
;
15381 for (j
= 0; j
< words
; j
++)
15383 addr
= plus_constant (Pmode
, src
,
15384 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
15385 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
15386 srcoffset
+ j
* UNITS_PER_WORD
);
15388 emit_move_insn (regs
[j
], mem
);
15390 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
15392 srcoffset
+= words
* UNITS_PER_WORD
;
15395 if (dst_aligned
&& words
> 1)
15397 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
15399 dst_autoinc
+= words
* UNITS_PER_WORD
;
15403 for (j
= 0; j
< words
; j
++)
15405 addr
= plus_constant (Pmode
, dst
,
15406 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
15407 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
15408 dstoffset
+ j
* UNITS_PER_WORD
);
15410 emit_move_insn (mem
, regs
[j
]);
15412 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
15414 dstoffset
+= words
* UNITS_PER_WORD
;
15417 remaining
-= words
* UNITS_PER_WORD
;
15419 gcc_assert (remaining
< 4);
15421 /* Copy a halfword if necessary. */
15423 if (remaining
>= 2)
15425 halfword_tmp
= gen_reg_rtx (SImode
);
15427 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
15428 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
15429 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
15431 /* Either write out immediately, or delay until we've loaded the last
15432 byte, depending on interleave factor. */
15433 if (interleave_factor
== 1)
15435 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15436 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
15437 emit_insn (gen_unaligned_storehi (mem
,
15438 gen_lowpart (HImode
, halfword_tmp
)));
15439 halfword_tmp
= NULL
;
15447 gcc_assert (remaining
< 2);
15449 /* Copy last byte. */
15451 if ((remaining
& 1) != 0)
15453 byte_tmp
= gen_reg_rtx (SImode
);
15455 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
15456 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
15457 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
15459 if (interleave_factor
== 1)
15461 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15462 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
15463 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
15472 /* Store last halfword if we haven't done so already. */
15476 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15477 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
15478 emit_insn (gen_unaligned_storehi (mem
,
15479 gen_lowpart (HImode
, halfword_tmp
)));
15483 /* Likewise for last byte. */
15487 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15488 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
15489 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
15493 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
15496 /* From mips_adjust_block_mem:
15498 Helper function for doing a loop-based block operation on memory
15499 reference MEM. Each iteration of the loop will operate on LENGTH
15502 Create a new base register for use within the loop and point it to
15503 the start of MEM. Create a new memory reference that uses this
15504 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15507 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
15510 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
15512 /* Although the new mem does not refer to a known location,
15513 it does keep up to LENGTH bytes of alignment. */
15514 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
15515 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
15518 /* From mips_block_move_loop:
15520 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15521 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15522 the memory regions do not overlap. */
15525 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
15526 unsigned int interleave_factor
,
15527 HOST_WIDE_INT bytes_per_iter
)
15529 rtx src_reg
, dest_reg
, final_src
, test
;
15530 HOST_WIDE_INT leftover
;
15532 leftover
= length
% bytes_per_iter
;
15533 length
-= leftover
;
15535 /* Create registers and memory references for use within the loop. */
15536 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
15537 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
15539 /* Calculate the value that SRC_REG should have after the last iteration of
15541 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
15542 0, 0, OPTAB_WIDEN
);
15544 /* Emit the start of the loop. */
15545 rtx_code_label
*label
= gen_label_rtx ();
15546 emit_label (label
);
15548 /* Emit the loop body. */
15549 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
15550 interleave_factor
);
15552 /* Move on to the next block. */
15553 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
15554 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
15556 /* Emit the loop condition. */
15557 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
15558 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
15560 /* Mop up any left-over bytes. */
15562 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
15565 /* Emit a block move when either the source or destination is unaligned (not
15566 aligned to a four-byte boundary). This may need further tuning depending on
15567 core type, optimize_size setting, etc. */
15570 arm_cpymemqi_unaligned (rtx
*operands
)
15572 HOST_WIDE_INT length
= INTVAL (operands
[2]);
15576 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
15577 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
15578 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15579 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15580 or dst_aligned though: allow more interleaving in those cases since the
15581 resulting code can be smaller. */
15582 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
15583 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
15586 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
15587 interleave_factor
, bytes_per_iter
);
15589 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
15590 interleave_factor
);
15594 /* Note that the loop created by arm_block_move_unaligned_loop may be
15595 subject to loop unrolling, which makes tuning this condition a little
15598 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
15600 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
15607 arm_gen_cpymemqi (rtx
*operands
)
15609 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
15610 HOST_WIDE_INT srcoffset
, dstoffset
;
15611 rtx src
, dst
, srcbase
, dstbase
;
15612 rtx part_bytes_reg
= NULL
;
15615 if (!CONST_INT_P (operands
[2])
15616 || !CONST_INT_P (operands
[3])
15617 || INTVAL (operands
[2]) > 64)
15620 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
15621 return arm_cpymemqi_unaligned (operands
);
15623 if (INTVAL (operands
[3]) & 3)
15626 dstbase
= operands
[0];
15627 srcbase
= operands
[1];
15629 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
15630 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
15632 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
15633 out_words_to_go
= INTVAL (operands
[2]) / 4;
15634 last_bytes
= INTVAL (operands
[2]) & 3;
15635 dstoffset
= srcoffset
= 0;
15637 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
15638 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
15640 while (in_words_to_go
>= 2)
15642 if (in_words_to_go
> 4)
15643 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
15644 TRUE
, srcbase
, &srcoffset
));
15646 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
15647 src
, FALSE
, srcbase
,
15650 if (out_words_to_go
)
15652 if (out_words_to_go
> 4)
15653 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
15654 TRUE
, dstbase
, &dstoffset
));
15655 else if (out_words_to_go
!= 1)
15656 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
15657 out_words_to_go
, dst
,
15660 dstbase
, &dstoffset
));
15663 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
15664 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
15665 if (last_bytes
!= 0)
15667 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
15673 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
15674 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
15677 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15678 if (out_words_to_go
)
15682 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
15683 sreg
= copy_to_reg (mem
);
15685 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
15686 emit_move_insn (mem
, sreg
);
15689 gcc_assert (!in_words_to_go
); /* Sanity check */
15692 if (in_words_to_go
)
15694 gcc_assert (in_words_to_go
> 0);
15696 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
15697 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
15700 gcc_assert (!last_bytes
|| part_bytes_reg
);
15702 if (BYTES_BIG_ENDIAN
&& last_bytes
)
15704 rtx tmp
= gen_reg_rtx (SImode
);
15706 /* The bytes we want are in the top end of the word. */
15707 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
15708 GEN_INT (8 * (4 - last_bytes
))));
15709 part_bytes_reg
= tmp
;
15713 mem
= adjust_automodify_address (dstbase
, QImode
,
15714 plus_constant (Pmode
, dst
,
15716 dstoffset
+ last_bytes
- 1);
15717 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
15721 tmp
= gen_reg_rtx (SImode
);
15722 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
15723 part_bytes_reg
= tmp
;
15730 if (last_bytes
> 1)
15732 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
15733 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
15737 rtx tmp
= gen_reg_rtx (SImode
);
15738 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
15739 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
15740 part_bytes_reg
= tmp
;
15747 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
15748 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
15755 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15758 next_consecutive_mem (rtx mem
)
15760 machine_mode mode
= GET_MODE (mem
);
15761 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
15762 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
15764 return adjust_automodify_address (mem
, mode
, addr
, offset
);
15767 /* Copy using LDRD/STRD instructions whenever possible.
15768 Returns true upon success. */
15770 gen_cpymem_ldrd_strd (rtx
*operands
)
15772 unsigned HOST_WIDE_INT len
;
15773 HOST_WIDE_INT align
;
15774 rtx src
, dst
, base
;
15776 bool src_aligned
, dst_aligned
;
15777 bool src_volatile
, dst_volatile
;
15779 gcc_assert (CONST_INT_P (operands
[2]));
15780 gcc_assert (CONST_INT_P (operands
[3]));
15782 len
= UINTVAL (operands
[2]);
15786 /* Maximum alignment we can assume for both src and dst buffers. */
15787 align
= INTVAL (operands
[3]);
15789 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
15792 /* Place src and dst addresses in registers
15793 and update the corresponding mem rtx. */
15795 dst_volatile
= MEM_VOLATILE_P (dst
);
15796 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
15797 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
15798 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
15801 src_volatile
= MEM_VOLATILE_P (src
);
15802 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
15803 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
15804 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
15806 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
15809 if (src_volatile
|| dst_volatile
)
15812 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15813 if (!(dst_aligned
|| src_aligned
))
15814 return arm_gen_cpymemqi (operands
);
15816 /* If the either src or dst is unaligned we'll be accessing it as pairs
15817 of unaligned SImode accesses. Otherwise we can generate DImode
15818 ldrd/strd instructions. */
15819 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
15820 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
15825 reg0
= gen_reg_rtx (DImode
);
15826 rtx first_reg
= NULL_RTX
;
15827 rtx second_reg
= NULL_RTX
;
15829 if (!src_aligned
|| !dst_aligned
)
15831 if (BYTES_BIG_ENDIAN
)
15833 second_reg
= gen_lowpart (SImode
, reg0
);
15834 first_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
15838 first_reg
= gen_lowpart (SImode
, reg0
);
15839 second_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
15842 if (MEM_ALIGN (src
) >= 2 * BITS_PER_WORD
)
15843 emit_move_insn (reg0
, src
);
15844 else if (src_aligned
)
15845 emit_insn (gen_unaligned_loaddi (reg0
, src
));
15848 emit_insn (gen_unaligned_loadsi (first_reg
, src
));
15849 src
= next_consecutive_mem (src
);
15850 emit_insn (gen_unaligned_loadsi (second_reg
, src
));
15853 if (MEM_ALIGN (dst
) >= 2 * BITS_PER_WORD
)
15854 emit_move_insn (dst
, reg0
);
15855 else if (dst_aligned
)
15856 emit_insn (gen_unaligned_storedi (dst
, reg0
));
15859 emit_insn (gen_unaligned_storesi (dst
, first_reg
));
15860 dst
= next_consecutive_mem (dst
);
15861 emit_insn (gen_unaligned_storesi (dst
, second_reg
));
15864 src
= next_consecutive_mem (src
);
15865 dst
= next_consecutive_mem (dst
);
15868 gcc_assert (len
< 8);
15871 /* More than a word but less than a double-word to copy. Copy a word. */
15872 reg0
= gen_reg_rtx (SImode
);
15873 src
= adjust_address (src
, SImode
, 0);
15874 dst
= adjust_address (dst
, SImode
, 0);
15876 emit_move_insn (reg0
, src
);
15878 emit_insn (gen_unaligned_loadsi (reg0
, src
));
15881 emit_move_insn (dst
, reg0
);
15883 emit_insn (gen_unaligned_storesi (dst
, reg0
));
15885 src
= next_consecutive_mem (src
);
15886 dst
= next_consecutive_mem (dst
);
15893 /* Copy the remaining bytes. */
15896 dst
= adjust_address (dst
, HImode
, 0);
15897 src
= adjust_address (src
, HImode
, 0);
15898 reg0
= gen_reg_rtx (SImode
);
15900 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
15902 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
15905 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
15907 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
15909 src
= next_consecutive_mem (src
);
15910 dst
= next_consecutive_mem (dst
);
15915 dst
= adjust_address (dst
, QImode
, 0);
15916 src
= adjust_address (src
, QImode
, 0);
15917 reg0
= gen_reg_rtx (QImode
);
15918 emit_move_insn (reg0
, src
);
15919 emit_move_insn (dst
, reg0
);
15923 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15924 into its component 32-bit subregs. OP2 may be an immediate
15925 constant and we want to simplify it in that case. */
15927 arm_decompose_di_binop (rtx op1
, rtx op2
, rtx
*lo_op1
, rtx
*hi_op1
,
15928 rtx
*lo_op2
, rtx
*hi_op2
)
15930 *lo_op1
= gen_lowpart (SImode
, op1
);
15931 *hi_op1
= gen_highpart (SImode
, op1
);
15932 *lo_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
15933 subreg_lowpart_offset (SImode
, DImode
));
15934 *hi_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
15935 subreg_highpart_offset (SImode
, DImode
));
15938 /* Select a dominance comparison mode if possible for a test of the general
15939 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15940 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15941 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15942 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15943 In all cases OP will be either EQ or NE, but we don't need to know which
15944 here. If we are unable to support a dominance comparison we return
15945 CC mode. This will then fail to match for the RTL expressions that
15946 generate this call. */
15948 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
15950 enum rtx_code cond1
, cond2
;
15953 /* Currently we will probably get the wrong result if the individual
15954 comparisons are not simple. This also ensures that it is safe to
15955 reverse a comparison if necessary. */
15956 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
15958 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
15962 /* The if_then_else variant of this tests the second condition if the
15963 first passes, but is true if the first fails. Reverse the first
15964 condition to get a true "inclusive-or" expression. */
15965 if (cond_or
== DOM_CC_NX_OR_Y
)
15966 cond1
= reverse_condition (cond1
);
15968 /* If the comparisons are not equal, and one doesn't dominate the other,
15969 then we can't do this. */
15971 && !comparison_dominates_p (cond1
, cond2
)
15972 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
15976 std::swap (cond1
, cond2
);
15981 if (cond_or
== DOM_CC_X_AND_Y
)
15986 case EQ
: return CC_DEQmode
;
15987 case LE
: return CC_DLEmode
;
15988 case LEU
: return CC_DLEUmode
;
15989 case GE
: return CC_DGEmode
;
15990 case GEU
: return CC_DGEUmode
;
15991 default: gcc_unreachable ();
15995 if (cond_or
== DOM_CC_X_AND_Y
)
16007 gcc_unreachable ();
16011 if (cond_or
== DOM_CC_X_AND_Y
)
16023 gcc_unreachable ();
16027 if (cond_or
== DOM_CC_X_AND_Y
)
16028 return CC_DLTUmode
;
16033 return CC_DLTUmode
;
16035 return CC_DLEUmode
;
16039 gcc_unreachable ();
16043 if (cond_or
== DOM_CC_X_AND_Y
)
16044 return CC_DGTUmode
;
16049 return CC_DGTUmode
;
16051 return CC_DGEUmode
;
16055 gcc_unreachable ();
16058 /* The remaining cases only occur when both comparisons are the
16061 gcc_assert (cond1
== cond2
);
16065 gcc_assert (cond1
== cond2
);
16069 gcc_assert (cond1
== cond2
);
16073 gcc_assert (cond1
== cond2
);
16074 return CC_DLEUmode
;
16077 gcc_assert (cond1
== cond2
);
16078 return CC_DGEUmode
;
16081 gcc_unreachable ();
16086 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
16088 /* All floating point compares return CCFP if it is an equality
16089 comparison, and CCFPE otherwise. */
16090 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
16113 gcc_unreachable ();
16117 /* A compare with a shifted operand. Because of canonicalization, the
16118 comparison will have to be swapped when we emit the assembler. */
16119 if (GET_MODE (y
) == SImode
16120 && (REG_P (y
) || (SUBREG_P (y
)))
16121 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
16122 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
16123 || GET_CODE (x
) == ROTATERT
))
16126 /* A widened compare of the sum of a value plus a carry against a
16127 constant. This is a representation of RSC. We want to swap the
16128 result of the comparison at output. Not valid if the Z bit is
16130 if (GET_MODE (x
) == DImode
16131 && GET_CODE (x
) == PLUS
16132 && arm_borrow_operation (XEXP (x
, 1), DImode
)
16134 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
16135 && (op
== LE
|| op
== GT
))
16136 || (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
16137 && (op
== LEU
|| op
== GTU
))))
16140 /* If X is a constant we want to use CC_RSBmode. This is
16141 non-canonical, but arm_gen_compare_reg uses this to generate the
16142 correct canonical form. */
16143 if (GET_MODE (y
) == SImode
16144 && (REG_P (y
) || SUBREG_P (y
))
16145 && CONST_INT_P (x
))
16148 /* This operation is performed swapped, but since we only rely on the Z
16149 flag we don't need an additional mode. */
16150 if (GET_MODE (y
) == SImode
16151 && (REG_P (y
) || (SUBREG_P (y
)))
16152 && GET_CODE (x
) == NEG
16153 && (op
== EQ
|| op
== NE
))
16156 /* This is a special case that is used by combine to allow a
16157 comparison of a shifted byte load to be split into a zero-extend
16158 followed by a comparison of the shifted integer (only valid for
16159 equalities and unsigned inequalities). */
16160 if (GET_MODE (x
) == SImode
16161 && GET_CODE (x
) == ASHIFT
16162 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
16163 && GET_CODE (XEXP (x
, 0)) == SUBREG
16164 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
16165 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
16166 && (op
== EQ
|| op
== NE
16167 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
16168 && CONST_INT_P (y
))
16171 /* A construct for a conditional compare, if the false arm contains
16172 0, then both conditions must be true, otherwise either condition
16173 must be true. Not all conditions are possible, so CCmode is
16174 returned if it can't be done. */
16175 if (GET_CODE (x
) == IF_THEN_ELSE
16176 && (XEXP (x
, 2) == const0_rtx
16177 || XEXP (x
, 2) == const1_rtx
)
16178 && COMPARISON_P (XEXP (x
, 0))
16179 && COMPARISON_P (XEXP (x
, 1)))
16180 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16181 INTVAL (XEXP (x
, 2)));
16183 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
16184 if (GET_CODE (x
) == AND
16185 && (op
== EQ
|| op
== NE
)
16186 && COMPARISON_P (XEXP (x
, 0))
16187 && COMPARISON_P (XEXP (x
, 1)))
16188 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16191 if (GET_CODE (x
) == IOR
16192 && (op
== EQ
|| op
== NE
)
16193 && COMPARISON_P (XEXP (x
, 0))
16194 && COMPARISON_P (XEXP (x
, 1)))
16195 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16198 /* An operation (on Thumb) where we want to test for a single bit.
16199 This is done by shifting that bit up into the top bit of a
16200 scratch register; we can then branch on the sign bit. */
16202 && GET_MODE (x
) == SImode
16203 && (op
== EQ
|| op
== NE
)
16204 && GET_CODE (x
) == ZERO_EXTRACT
16205 && XEXP (x
, 1) == const1_rtx
)
16208 /* An operation that sets the condition codes as a side-effect, the
16209 V flag is not set correctly, so we can only use comparisons where
16210 this doesn't matter. (For LT and GE we can use "mi" and "pl"
16212 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
16213 if (GET_MODE (x
) == SImode
16215 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
16216 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
16217 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
16218 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
16219 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
16220 || GET_CODE (x
) == LSHIFTRT
16221 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
16222 || GET_CODE (x
) == ROTATERT
16223 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
16226 /* A comparison of ~reg with a const is really a special
16227 canoncialization of compare (~const, reg), which is a reverse
16228 subtract operation. We may not get here if CONST is 0, but that
16229 doesn't matter because ~0 isn't a valid immediate for RSB. */
16230 if (GET_MODE (x
) == SImode
16231 && GET_CODE (x
) == NOT
16232 && CONST_INT_P (y
))
16235 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
16238 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
16239 && GET_CODE (x
) == PLUS
16240 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
16243 if (GET_MODE (x
) == DImode
16244 && GET_CODE (x
) == PLUS
16245 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
16247 && UINTVAL (y
) == 0x800000000
16248 && (op
== GEU
|| op
== LTU
))
16251 if (GET_MODE (x
) == DImode
16252 && (op
== GE
|| op
== LT
)
16253 && GET_CODE (x
) == SIGN_EXTEND
16254 && ((GET_CODE (y
) == PLUS
16255 && arm_borrow_operation (XEXP (y
, 0), DImode
))
16256 || arm_borrow_operation (y
, DImode
)))
16259 if (GET_MODE (x
) == DImode
16260 && (op
== GEU
|| op
== LTU
)
16261 && GET_CODE (x
) == ZERO_EXTEND
16262 && ((GET_CODE (y
) == PLUS
16263 && arm_borrow_operation (XEXP (y
, 0), DImode
))
16264 || arm_borrow_operation (y
, DImode
)))
16267 if (GET_MODE (x
) == DImode
16268 && (op
== EQ
|| op
== NE
)
16269 && (GET_CODE (x
) == PLUS
16270 || GET_CODE (x
) == MINUS
)
16271 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
16272 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
16273 && GET_CODE (y
) == SIGN_EXTEND
16274 && GET_CODE (XEXP (y
, 0)) == GET_CODE (x
))
16277 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
16278 return GET_MODE (x
);
16283 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
16284 the sequence of instructions needed to generate a suitable condition
16285 code register. Return the CC register result. */
16287 arm_gen_dicompare_reg (rtx_code code
, rtx x
, rtx y
, rtx scratch
)
16292 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
16293 gcc_assert (TARGET_32BIT
);
16294 gcc_assert (!CONST_INT_P (x
));
16296 rtx x_lo
= simplify_gen_subreg (SImode
, x
, DImode
,
16297 subreg_lowpart_offset (SImode
, DImode
));
16298 rtx x_hi
= simplify_gen_subreg (SImode
, x
, DImode
,
16299 subreg_highpart_offset (SImode
, DImode
));
16300 rtx y_lo
= simplify_gen_subreg (SImode
, y
, DImode
,
16301 subreg_lowpart_offset (SImode
, DImode
));
16302 rtx y_hi
= simplify_gen_subreg (SImode
, y
, DImode
,
16303 subreg_highpart_offset (SImode
, DImode
));
16309 if (y_lo
== const0_rtx
|| y_hi
== const0_rtx
)
16311 if (y_lo
!= const0_rtx
)
16313 rtx scratch2
= scratch
? scratch
: gen_reg_rtx (SImode
);
16315 gcc_assert (y_hi
== const0_rtx
);
16316 y_lo
= gen_int_mode (-INTVAL (y_lo
), SImode
);
16317 if (!arm_add_operand (y_lo
, SImode
))
16318 y_lo
= force_reg (SImode
, y_lo
);
16319 emit_insn (gen_addsi3 (scratch2
, x_lo
, y_lo
));
16322 else if (y_hi
!= const0_rtx
)
16324 rtx scratch2
= scratch
? scratch
: gen_reg_rtx (SImode
);
16326 y_hi
= gen_int_mode (-INTVAL (y_hi
), SImode
);
16327 if (!arm_add_operand (y_hi
, SImode
))
16328 y_hi
= force_reg (SImode
, y_hi
);
16329 emit_insn (gen_addsi3 (scratch2
, x_hi
, y_hi
));
16335 gcc_assert (!reload_completed
);
16336 scratch
= gen_rtx_SCRATCH (SImode
);
16339 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
16340 cc_reg
= gen_rtx_REG (CC_NZmode
, CC_REGNUM
);
16343 = gen_rtx_SET (cc_reg
,
16344 gen_rtx_COMPARE (CC_NZmode
,
16345 gen_rtx_IOR (SImode
, x_lo
, x_hi
),
16347 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
,
16352 if (!arm_add_operand (y_lo
, SImode
))
16353 y_lo
= force_reg (SImode
, y_lo
);
16355 if (!arm_add_operand (y_hi
, SImode
))
16356 y_hi
= force_reg (SImode
, y_hi
);
16358 rtx cmp1
= gen_rtx_NE (SImode
, x_lo
, y_lo
);
16359 rtx cmp2
= gen_rtx_NE (SImode
, x_hi
, y_hi
);
16360 rtx conjunction
= gen_rtx_IOR (SImode
, cmp1
, cmp2
);
16361 mode
= SELECT_CC_MODE (code
, conjunction
, const0_rtx
);
16362 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
16364 emit_insn (gen_rtx_SET (cc_reg
,
16365 gen_rtx_COMPARE (mode
, conjunction
,
16373 if (y_lo
== const0_rtx
)
16375 /* If the low word of y is 0, then this is simply a normal
16376 compare of the upper words. */
16377 if (!arm_add_operand (y_hi
, SImode
))
16378 y_hi
= force_reg (SImode
, y_hi
);
16380 return arm_gen_compare_reg (code
, x_hi
, y_hi
, NULL_RTX
);
16383 if (!arm_add_operand (y_lo
, SImode
))
16384 y_lo
= force_reg (SImode
, y_lo
);
16387 = gen_rtx_LTU (DImode
,
16388 arm_gen_compare_reg (LTU
, x_lo
, y_lo
, NULL_RTX
),
16392 scratch
= gen_rtx_SCRATCH (SImode
);
16394 if (!arm_not_operand (y_hi
, SImode
))
16395 y_hi
= force_reg (SImode
, y_hi
);
16398 if (y_hi
== const0_rtx
)
16399 insn
= emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch
, x_hi
,
16401 else if (CONST_INT_P (y_hi
))
16402 insn
= emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch
, x_hi
,
16405 insn
= emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch
, x_hi
, y_hi
,
16407 return SET_DEST (single_set (insn
));
16413 /* During expansion, we only expect to get here if y is a
16414 constant that we want to handle, otherwise we should have
16415 swapped the operands already. */
16416 gcc_assert (arm_const_double_prefer_rsbs_rsc (y
));
16418 if (!const_ok_for_arm (INTVAL (y_lo
)))
16419 y_lo
= force_reg (SImode
, y_lo
);
16421 /* Perform a reverse subtract and compare. */
16423 = gen_rtx_LTU (DImode
,
16424 arm_gen_compare_reg (LTU
, y_lo
, x_lo
, scratch
),
16426 rtx_insn
*insn
= emit_insn (gen_rscsi3_CC_NVout_scratch (scratch
, y_hi
,
16428 return SET_DEST (single_set (insn
));
16434 if (y_lo
== const0_rtx
)
16436 /* If the low word of y is 0, then this is simply a normal
16437 compare of the upper words. */
16438 if (!arm_add_operand (y_hi
, SImode
))
16439 y_hi
= force_reg (SImode
, y_hi
);
16441 return arm_gen_compare_reg (code
, x_hi
, y_hi
, NULL_RTX
);
16444 if (!arm_add_operand (y_lo
, SImode
))
16445 y_lo
= force_reg (SImode
, y_lo
);
16448 = gen_rtx_LTU (DImode
,
16449 arm_gen_compare_reg (LTU
, x_lo
, y_lo
, NULL_RTX
),
16453 scratch
= gen_rtx_SCRATCH (SImode
);
16454 if (!arm_not_operand (y_hi
, SImode
))
16455 y_hi
= force_reg (SImode
, y_hi
);
16458 if (y_hi
== const0_rtx
)
16459 insn
= emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch
, x_hi
,
16461 else if (CONST_INT_P (y_hi
))
16463 /* Constant is viewed as unsigned when zero-extended. */
16464 y_hi
= GEN_INT (UINTVAL (y_hi
) & 0xffffffffULL
);
16465 insn
= emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch
, x_hi
,
16469 insn
= emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch
, x_hi
, y_hi
,
16471 return SET_DEST (single_set (insn
));
16477 /* During expansion, we only expect to get here if y is a
16478 constant that we want to handle, otherwise we should have
16479 swapped the operands already. */
16480 gcc_assert (arm_const_double_prefer_rsbs_rsc (y
));
16482 if (!const_ok_for_arm (INTVAL (y_lo
)))
16483 y_lo
= force_reg (SImode
, y_lo
);
16485 /* Perform a reverse subtract and compare. */
16487 = gen_rtx_LTU (DImode
,
16488 arm_gen_compare_reg (LTU
, y_lo
, x_lo
, scratch
),
16490 y_hi
= GEN_INT (0xffffffff & UINTVAL (y_hi
));
16491 rtx_insn
*insn
= emit_insn (gen_rscsi3_CC_Bout_scratch (scratch
, y_hi
,
16493 return SET_DEST (single_set (insn
));
16497 gcc_unreachable ();
16501 /* X and Y are two things to compare using CODE. Emit the compare insn and
16502 return the rtx for register 0 in the proper mode. */
16504 arm_gen_compare_reg (rtx_code code
, rtx x
, rtx y
, rtx scratch
)
16506 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
16507 return arm_gen_dicompare_reg (code
, x
, y
, scratch
);
16509 machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
16510 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
16511 if (mode
== CC_RSBmode
)
16514 scratch
= gen_rtx_SCRATCH (SImode
);
16515 emit_insn (gen_rsb_imm_compare_scratch (scratch
,
16516 GEN_INT (~UINTVAL (x
)), y
));
16519 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
16524 /* Generate a sequence of insns that will generate the correct return
16525 address mask depending on the physical architecture that the program
16528 arm_gen_return_addr_mask (void)
16530 rtx reg
= gen_reg_rtx (Pmode
);
16532 emit_insn (gen_return_addr_mask (reg
));
16537 arm_reload_in_hi (rtx
*operands
)
16539 rtx ref
= operands
[1];
16541 HOST_WIDE_INT offset
= 0;
16543 if (SUBREG_P (ref
))
16545 offset
= SUBREG_BYTE (ref
);
16546 ref
= SUBREG_REG (ref
);
16551 /* We have a pseudo which has been spilt onto the stack; there
16552 are two cases here: the first where there is a simple
16553 stack-slot replacement and a second where the stack-slot is
16554 out of range, or is used as a subreg. */
16555 if (reg_equiv_mem (REGNO (ref
)))
16557 ref
= reg_equiv_mem (REGNO (ref
));
16558 base
= find_replacement (&XEXP (ref
, 0));
16561 /* The slot is out of range, or was dressed up in a SUBREG. */
16562 base
= reg_equiv_address (REGNO (ref
));
16564 /* PR 62554: If there is no equivalent memory location then just move
16565 the value as an SImode register move. This happens when the target
16566 architecture variant does not have an HImode register move. */
16569 gcc_assert (REG_P (operands
[0]));
16570 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16571 gen_rtx_SUBREG (SImode
, ref
, 0)));
16576 base
= find_replacement (&XEXP (ref
, 0));
16578 /* Handle the case where the address is too complex to be offset by 1. */
16579 if (GET_CODE (base
) == MINUS
16580 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
16582 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16584 emit_set_insn (base_plus
, base
);
16587 else if (GET_CODE (base
) == PLUS
)
16589 /* The addend must be CONST_INT, or we would have dealt with it above. */
16590 HOST_WIDE_INT hi
, lo
;
16592 offset
+= INTVAL (XEXP (base
, 1));
16593 base
= XEXP (base
, 0);
16595 /* Rework the address into a legal sequence of insns. */
16596 /* Valid range for lo is -4095 -> 4095 */
16599 : -((-offset
) & 0xfff));
16601 /* Corner case, if lo is the max offset then we would be out of range
16602 once we have added the additional 1 below, so bump the msb into the
16603 pre-loading insn(s). */
16607 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
16608 ^ (HOST_WIDE_INT
) 0x80000000)
16609 - (HOST_WIDE_INT
) 0x80000000);
16611 gcc_assert (hi
+ lo
== offset
);
16615 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16617 /* Get the base address; addsi3 knows how to handle constants
16618 that require more than one insn. */
16619 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
16625 /* Operands[2] may overlap operands[0] (though it won't overlap
16626 operands[1]), that's why we asked for a DImode reg -- so we can
16627 use the bit that does not overlap. */
16628 if (REGNO (operands
[2]) == REGNO (operands
[0]))
16629 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16631 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
16633 emit_insn (gen_zero_extendqisi2 (scratch
,
16634 gen_rtx_MEM (QImode
,
16635 plus_constant (Pmode
, base
,
16637 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16638 gen_rtx_MEM (QImode
,
16639 plus_constant (Pmode
, base
,
16641 if (!BYTES_BIG_ENDIAN
)
16642 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16643 gen_rtx_IOR (SImode
,
16646 gen_rtx_SUBREG (SImode
, operands
[0], 0),
16650 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16651 gen_rtx_IOR (SImode
,
16652 gen_rtx_ASHIFT (SImode
, scratch
,
16654 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
16657 /* Handle storing a half-word to memory during reload by synthesizing as two
16658 byte stores. Take care not to clobber the input values until after we
16659 have moved them somewhere safe. This code assumes that if the DImode
16660 scratch in operands[2] overlaps either the input value or output address
16661 in some way, then that value must die in this insn (we absolutely need
16662 two scratch registers for some corner cases). */
16664 arm_reload_out_hi (rtx
*operands
)
16666 rtx ref
= operands
[0];
16667 rtx outval
= operands
[1];
16669 HOST_WIDE_INT offset
= 0;
16671 if (SUBREG_P (ref
))
16673 offset
= SUBREG_BYTE (ref
);
16674 ref
= SUBREG_REG (ref
);
16679 /* We have a pseudo which has been spilt onto the stack; there
16680 are two cases here: the first where there is a simple
16681 stack-slot replacement and a second where the stack-slot is
16682 out of range, or is used as a subreg. */
16683 if (reg_equiv_mem (REGNO (ref
)))
16685 ref
= reg_equiv_mem (REGNO (ref
));
16686 base
= find_replacement (&XEXP (ref
, 0));
16689 /* The slot is out of range, or was dressed up in a SUBREG. */
16690 base
= reg_equiv_address (REGNO (ref
));
16692 /* PR 62254: If there is no equivalent memory location then just move
16693 the value as an SImode register move. This happens when the target
16694 architecture variant does not have an HImode register move. */
16697 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
16699 if (REG_P (outval
))
16701 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
16702 gen_rtx_SUBREG (SImode
, outval
, 0)));
16704 else /* SUBREG_P (outval) */
16706 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
16707 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
16708 SUBREG_REG (outval
)));
16710 /* FIXME: Handle other cases ? */
16711 gcc_unreachable ();
16717 base
= find_replacement (&XEXP (ref
, 0));
16719 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
16721 /* Handle the case where the address is too complex to be offset by 1. */
16722 if (GET_CODE (base
) == MINUS
16723 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
16725 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16727 /* Be careful not to destroy OUTVAL. */
16728 if (reg_overlap_mentioned_p (base_plus
, outval
))
16730 /* Updating base_plus might destroy outval, see if we can
16731 swap the scratch and base_plus. */
16732 if (!reg_overlap_mentioned_p (scratch
, outval
))
16733 std::swap (scratch
, base_plus
);
16736 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
16738 /* Be conservative and copy OUTVAL into the scratch now,
16739 this should only be necessary if outval is a subreg
16740 of something larger than a word. */
16741 /* XXX Might this clobber base? I can't see how it can,
16742 since scratch is known to overlap with OUTVAL, and
16743 must be wider than a word. */
16744 emit_insn (gen_movhi (scratch_hi
, outval
));
16745 outval
= scratch_hi
;
16749 emit_set_insn (base_plus
, base
);
16752 else if (GET_CODE (base
) == PLUS
)
16754 /* The addend must be CONST_INT, or we would have dealt with it above. */
16755 HOST_WIDE_INT hi
, lo
;
16757 offset
+= INTVAL (XEXP (base
, 1));
16758 base
= XEXP (base
, 0);
16760 /* Rework the address into a legal sequence of insns. */
16761 /* Valid range for lo is -4095 -> 4095 */
16764 : -((-offset
) & 0xfff));
16766 /* Corner case, if lo is the max offset then we would be out of range
16767 once we have added the additional 1 below, so bump the msb into the
16768 pre-loading insn(s). */
16772 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
16773 ^ (HOST_WIDE_INT
) 0x80000000)
16774 - (HOST_WIDE_INT
) 0x80000000);
16776 gcc_assert (hi
+ lo
== offset
);
16780 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16782 /* Be careful not to destroy OUTVAL. */
16783 if (reg_overlap_mentioned_p (base_plus
, outval
))
16785 /* Updating base_plus might destroy outval, see if we
16786 can swap the scratch and base_plus. */
16787 if (!reg_overlap_mentioned_p (scratch
, outval
))
16788 std::swap (scratch
, base_plus
);
16791 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
16793 /* Be conservative and copy outval into scratch now,
16794 this should only be necessary if outval is a
16795 subreg of something larger than a word. */
16796 /* XXX Might this clobber base? I can't see how it
16797 can, since scratch is known to overlap with
16799 emit_insn (gen_movhi (scratch_hi
, outval
));
16800 outval
= scratch_hi
;
16804 /* Get the base address; addsi3 knows how to handle constants
16805 that require more than one insn. */
16806 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
16812 if (BYTES_BIG_ENDIAN
)
16814 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
16815 plus_constant (Pmode
, base
,
16817 gen_lowpart (QImode
, outval
)));
16818 emit_insn (gen_lshrsi3 (scratch
,
16819 gen_rtx_SUBREG (SImode
, outval
, 0),
16821 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
16823 gen_lowpart (QImode
, scratch
)));
16827 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
16829 gen_lowpart (QImode
, outval
)));
16830 emit_insn (gen_lshrsi3 (scratch
,
16831 gen_rtx_SUBREG (SImode
, outval
, 0),
16833 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
16834 plus_constant (Pmode
, base
,
16836 gen_lowpart (QImode
, scratch
)));
16840 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16841 (padded to the size of a word) should be passed in a register. */
16844 arm_must_pass_in_stack (const function_arg_info
&arg
)
16846 if (TARGET_AAPCS_BASED
)
16847 return must_pass_in_stack_var_size (arg
);
16849 return must_pass_in_stack_var_size_or_pad (arg
);
16853 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16854 byte of a stack argument has useful data. For legacy APCS ABIs we use
16855 the default. For AAPCS based ABIs small aggregate types are placed
16856 in the lowest memory address. */
16858 static pad_direction
16859 arm_function_arg_padding (machine_mode mode
, const_tree type
)
16861 if (!TARGET_AAPCS_BASED
)
16862 return default_function_arg_padding (mode
, type
);
16864 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
16865 return PAD_DOWNWARD
;
16871 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16872 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16873 register has useful data, and return the opposite if the most
16874 significant byte does. */
16877 arm_pad_reg_upward (machine_mode mode
,
16878 tree type
, int first ATTRIBUTE_UNUSED
)
16880 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
16882 /* For AAPCS, small aggregates, small fixed-point types,
16883 and small complex types are always padded upwards. */
16886 if ((AGGREGATE_TYPE_P (type
)
16887 || TREE_CODE (type
) == COMPLEX_TYPE
16888 || FIXED_POINT_TYPE_P (type
))
16889 && int_size_in_bytes (type
) <= 4)
16894 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
16895 && GET_MODE_SIZE (mode
) <= 4)
16900 /* Otherwise, use default padding. */
16901 return !BYTES_BIG_ENDIAN
;
16904 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16905 assuming that the address in the base register is word aligned. */
16907 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
16909 HOST_WIDE_INT max_offset
;
16911 /* Offset must be a multiple of 4 in Thumb mode. */
16912 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
16917 else if (TARGET_ARM
)
16922 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
16925 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16926 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16927 Assumes that the address in the base register RN is word aligned. Pattern
16928 guarantees that both memory accesses use the same base register,
16929 the offsets are constants within the range, and the gap between the offsets is 4.
16930 If preload complete then check that registers are legal. WBACK indicates whether
16931 address is updated. LOAD indicates whether memory access is load or store. */
16933 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
16934 bool wback
, bool load
)
16936 unsigned int t
, t2
, n
;
16938 if (!reload_completed
)
16941 if (!offset_ok_for_ldrd_strd (offset
))
16948 if ((TARGET_THUMB2
)
16949 && ((wback
&& (n
== t
|| n
== t2
))
16950 || (t
== SP_REGNUM
)
16951 || (t
== PC_REGNUM
)
16952 || (t2
== SP_REGNUM
)
16953 || (t2
== PC_REGNUM
)
16954 || (!load
&& (n
== PC_REGNUM
))
16955 || (load
&& (t
== t2
))
16956 /* Triggers Cortex-M3 LDRD errata. */
16957 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
16961 && ((wback
&& (n
== t
|| n
== t2
))
16962 || (t2
== PC_REGNUM
)
16963 || (t
% 2 != 0) /* First destination register is not even. */
16965 /* PC can be used as base register (for offset addressing only),
16966 but it is depricated. */
16967 || (n
== PC_REGNUM
)))
16973 /* Return true if a 64-bit access with alignment ALIGN and with a
16974 constant offset OFFSET from the base pointer is permitted on this
16977 align_ok_ldrd_strd (HOST_WIDE_INT align
, HOST_WIDE_INT offset
)
16979 return (unaligned_access
16980 ? (align
>= BITS_PER_WORD
&& (offset
& 3) == 0)
16981 : (align
>= 2 * BITS_PER_WORD
&& (offset
& 7) == 0));
16984 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
16985 operand MEM's address contains an immediate offset from the base
16986 register and has no side effects, in which case it sets BASE,
16987 OFFSET and ALIGN accordingly. */
16989 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
, HOST_WIDE_INT
*align
)
16993 gcc_assert (base
!= NULL
&& offset
!= NULL
);
16995 /* TODO: Handle more general memory operand patterns, such as
16996 PRE_DEC and PRE_INC. */
16998 if (side_effects_p (mem
))
17001 /* Can't deal with subregs. */
17002 if (SUBREG_P (mem
))
17005 gcc_assert (MEM_P (mem
));
17007 *offset
= const0_rtx
;
17008 *align
= MEM_ALIGN (mem
);
17010 addr
= XEXP (mem
, 0);
17012 /* If addr isn't valid for DImode, then we can't handle it. */
17013 if (!arm_legitimate_address_p (DImode
, addr
,
17014 reload_in_progress
|| reload_completed
))
17022 else if (GET_CODE (addr
) == PLUS
)
17024 *base
= XEXP (addr
, 0);
17025 *offset
= XEXP (addr
, 1);
17026 return (REG_P (*base
) && CONST_INT_P (*offset
));
17032 /* Called from a peephole2 to replace two word-size accesses with a
17033 single LDRD/STRD instruction. Returns true iff we can generate a
17034 new instruction sequence. That is, both accesses use the same base
17035 register and the gap between constant offsets is 4. This function
17036 may reorder its operands to match ldrd/strd RTL templates.
17037 OPERANDS are the operands found by the peephole matcher;
17038 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
17039 corresponding memory operands. LOAD indicaates whether the access
17040 is load or store. CONST_STORE indicates a store of constant
17041 integer values held in OPERANDS[4,5] and assumes that the pattern
17042 is of length 4 insn, for the purpose of checking dead registers.
17043 COMMUTE indicates that register operands may be reordered. */
17045 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
17046 bool const_store
, bool commute
)
17049 HOST_WIDE_INT offsets
[2], offset
, align
[2];
17050 rtx base
= NULL_RTX
;
17051 rtx cur_base
, cur_offset
, tmp
;
17053 HARD_REG_SET regset
;
17055 gcc_assert (!const_store
|| !load
);
17056 /* Check that the memory references are immediate offsets from the
17057 same base register. Extract the base register, the destination
17058 registers, and the corresponding memory offsets. */
17059 for (i
= 0; i
< nops
; i
++)
17061 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
17067 else if (REGNO (base
) != REGNO (cur_base
))
17070 offsets
[i
] = INTVAL (cur_offset
);
17071 if (GET_CODE (operands
[i
]) == SUBREG
)
17073 tmp
= SUBREG_REG (operands
[i
]);
17074 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
17079 /* Make sure there is no dependency between the individual loads. */
17080 if (load
&& REGNO (operands
[0]) == REGNO (base
))
17081 return false; /* RAW */
17083 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
17084 return false; /* WAW */
17086 /* If the same input register is used in both stores
17087 when storing different constants, try to find a free register.
17088 For example, the code
17093 can be transformed into
17097 in Thumb mode assuming that r1 is free.
17098 For ARM mode do the same but only if the starting register
17099 can be made to be even. */
17101 && REGNO (operands
[0]) == REGNO (operands
[1])
17102 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
17106 CLEAR_HARD_REG_SET (regset
);
17107 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17108 if (tmp
== NULL_RTX
)
17111 /* Use the new register in the first load to ensure that
17112 if the original input register is not dead after peephole,
17113 then it will have the correct constant value. */
17116 else if (TARGET_ARM
)
17118 int regno
= REGNO (operands
[0]);
17119 if (!peep2_reg_dead_p (4, operands
[0]))
17121 /* When the input register is even and is not dead after the
17122 pattern, it has to hold the second constant but we cannot
17123 form a legal STRD in ARM mode with this register as the second
17125 if (regno
% 2 == 0)
17128 /* Is regno-1 free? */
17129 SET_HARD_REG_SET (regset
);
17130 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
17131 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17132 if (tmp
== NULL_RTX
)
17139 /* Find a DImode register. */
17140 CLEAR_HARD_REG_SET (regset
);
17141 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
17142 if (tmp
!= NULL_RTX
)
17144 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
17145 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
17149 /* Can we use the input register to form a DI register? */
17150 SET_HARD_REG_SET (regset
);
17151 CLEAR_HARD_REG_BIT(regset
,
17152 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
17153 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17154 if (tmp
== NULL_RTX
)
17156 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
17160 gcc_assert (operands
[0] != NULL_RTX
);
17161 gcc_assert (operands
[1] != NULL_RTX
);
17162 gcc_assert (REGNO (operands
[0]) % 2 == 0);
17163 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
17167 /* Make sure the instructions are ordered with lower memory access first. */
17168 if (offsets
[0] > offsets
[1])
17170 gap
= offsets
[0] - offsets
[1];
17171 offset
= offsets
[1];
17173 /* Swap the instructions such that lower memory is accessed first. */
17174 std::swap (operands
[0], operands
[1]);
17175 std::swap (operands
[2], operands
[3]);
17176 std::swap (align
[0], align
[1]);
17178 std::swap (operands
[4], operands
[5]);
17182 gap
= offsets
[1] - offsets
[0];
17183 offset
= offsets
[0];
17186 /* Make sure accesses are to consecutive memory locations. */
17187 if (gap
!= GET_MODE_SIZE (SImode
))
17190 if (!align_ok_ldrd_strd (align
[0], offset
))
17193 /* Make sure we generate legal instructions. */
17194 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17198 /* In Thumb state, where registers are almost unconstrained, there
17199 is little hope to fix it. */
17203 if (load
&& commute
)
17205 /* Try reordering registers. */
17206 std::swap (operands
[0], operands
[1]);
17207 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17214 /* If input registers are dead after this pattern, they can be
17215 reordered or replaced by other registers that are free in the
17216 current pattern. */
17217 if (!peep2_reg_dead_p (4, operands
[0])
17218 || !peep2_reg_dead_p (4, operands
[1]))
17221 /* Try to reorder the input registers. */
17222 /* For example, the code
17227 can be transformed into
17232 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
17235 std::swap (operands
[0], operands
[1]);
17239 /* Try to find a free DI register. */
17240 CLEAR_HARD_REG_SET (regset
);
17241 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
17242 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
17245 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
17246 if (tmp
== NULL_RTX
)
17249 /* DREG must be an even-numbered register in DImode.
17250 Split it into SI registers. */
17251 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
17252 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
17253 gcc_assert (operands
[0] != NULL_RTX
);
17254 gcc_assert (operands
[1] != NULL_RTX
);
17255 gcc_assert (REGNO (operands
[0]) % 2 == 0);
17256 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
17258 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
17268 /* Return true if parallel execution of the two word-size accesses provided
17269 could be satisfied with a single LDRD/STRD instruction. Two word-size
17270 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17271 register operands and OPERANDS[2,3] are the corresponding memory operands.
17274 valid_operands_ldrd_strd (rtx
*operands
, bool load
)
17277 HOST_WIDE_INT offsets
[2], offset
, align
[2];
17278 rtx base
= NULL_RTX
;
17279 rtx cur_base
, cur_offset
;
17282 /* Check that the memory references are immediate offsets from the
17283 same base register. Extract the base register, the destination
17284 registers, and the corresponding memory offsets. */
17285 for (i
= 0; i
< nops
; i
++)
17287 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
17293 else if (REGNO (base
) != REGNO (cur_base
))
17296 offsets
[i
] = INTVAL (cur_offset
);
17297 if (GET_CODE (operands
[i
]) == SUBREG
)
17301 if (offsets
[0] > offsets
[1])
17304 gap
= offsets
[1] - offsets
[0];
17305 offset
= offsets
[0];
17307 /* Make sure accesses are to consecutive memory locations. */
17308 if (gap
!= GET_MODE_SIZE (SImode
))
17311 if (!align_ok_ldrd_strd (align
[0], offset
))
17314 return operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17319 /* Print a symbolic form of X to the debug file, F. */
17321 arm_print_value (FILE *f
, rtx x
)
17323 switch (GET_CODE (x
))
17326 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
17332 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
17333 sizeof (fpstr
), 0, 1);
17343 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
17345 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
17346 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
17354 fprintf (f
, "\"%s\"", XSTR (x
, 0));
17358 fprintf (f
, "`%s'", XSTR (x
, 0));
17362 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
17366 arm_print_value (f
, XEXP (x
, 0));
17370 arm_print_value (f
, XEXP (x
, 0));
17372 arm_print_value (f
, XEXP (x
, 1));
17380 fprintf (f
, "????");
17385 /* Routines for manipulation of the constant pool. */
17387 /* Arm instructions cannot load a large constant directly into a
17388 register; they have to come from a pc relative load. The constant
17389 must therefore be placed in the addressable range of the pc
17390 relative load. Depending on the precise pc relative load
17391 instruction the range is somewhere between 256 bytes and 4k. This
17392 means that we often have to dump a constant inside a function, and
17393 generate code to branch around it.
17395 It is important to minimize this, since the branches will slow
17396 things down and make the code larger.
17398 Normally we can hide the table after an existing unconditional
17399 branch so that there is no interruption of the flow, but in the
17400 worst case the code looks like this:
17418 We fix this by performing a scan after scheduling, which notices
17419 which instructions need to have their operands fetched from the
17420 constant table and builds the table.
17422 The algorithm starts by building a table of all the constants that
17423 need fixing up and all the natural barriers in the function (places
17424 where a constant table can be dropped without breaking the flow).
17425 For each fixup we note how far the pc-relative replacement will be
17426 able to reach and the offset of the instruction into the function.
17428 Having built the table we then group the fixes together to form
17429 tables that are as large as possible (subject to addressing
17430 constraints) and emit each table of constants after the last
17431 barrier that is within range of all the instructions in the group.
17432 If a group does not contain a barrier, then we forcibly create one
17433 by inserting a jump instruction into the flow. Once the table has
17434 been inserted, the insns are then modified to reference the
17435 relevant entry in the pool.
17437 Possible enhancements to the algorithm (not implemented) are:
17439 1) For some processors and object formats, there may be benefit in
17440 aligning the pools to the start of cache lines; this alignment
17441 would need to be taken into account when calculating addressability
17444 /* These typedefs are located at the start of this file, so that
17445 they can be used in the prototypes there. This comment is to
17446 remind readers of that fact so that the following structures
17447 can be understood more easily.
17449 typedef struct minipool_node Mnode;
17450 typedef struct minipool_fixup Mfix; */
17452 struct minipool_node
17454 /* Doubly linked chain of entries. */
17457 /* The maximum offset into the code that this entry can be placed. While
17458 pushing fixes for forward references, all entries are sorted in order
17459 of increasing max_address. */
17460 HOST_WIDE_INT max_address
;
17461 /* Similarly for an entry inserted for a backwards ref. */
17462 HOST_WIDE_INT min_address
;
17463 /* The number of fixes referencing this entry. This can become zero
17464 if we "unpush" an entry. In this case we ignore the entry when we
17465 come to emit the code. */
17467 /* The offset from the start of the minipool. */
17468 HOST_WIDE_INT offset
;
17469 /* The value in table. */
17471 /* The mode of value. */
17473 /* The size of the value. With iWMMXt enabled
17474 sizes > 4 also imply an alignment of 8-bytes. */
17478 struct minipool_fixup
17482 HOST_WIDE_INT address
;
17488 HOST_WIDE_INT forwards
;
17489 HOST_WIDE_INT backwards
;
17492 /* Fixes less than a word need padding out to a word boundary. */
17493 #define MINIPOOL_FIX_SIZE(mode) \
17494 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17496 static Mnode
* minipool_vector_head
;
17497 static Mnode
* minipool_vector_tail
;
17498 static rtx_code_label
*minipool_vector_label
;
17499 static int minipool_pad
;
17501 /* The linked list of all minipool fixes required for this function. */
17502 Mfix
* minipool_fix_head
;
17503 Mfix
* minipool_fix_tail
;
17504 /* The fix entry for the current minipool, once it has been placed. */
17505 Mfix
* minipool_barrier
;
17507 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17508 #define JUMP_TABLES_IN_TEXT_SECTION 0
17511 static HOST_WIDE_INT
17512 get_jump_table_size (rtx_jump_table_data
*insn
)
17514 /* ADDR_VECs only take room if read-only data does into the text
17516 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
17518 rtx body
= PATTERN (insn
);
17519 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
17520 HOST_WIDE_INT size
;
17521 HOST_WIDE_INT modesize
;
17523 modesize
= GET_MODE_SIZE (GET_MODE (body
));
17524 size
= modesize
* XVECLEN (body
, elt
);
17528 /* Round up size of TBB table to a halfword boundary. */
17529 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
17532 /* No padding necessary for TBH. */
17535 /* Add two bytes for alignment on Thumb. */
17540 gcc_unreachable ();
17548 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17549 function descriptor) into a register and the GOT address into the
17550 FDPIC register, returning an rtx for the register holding the
17551 function address. */
17554 arm_load_function_descriptor (rtx funcdesc
)
17556 rtx fnaddr_reg
= gen_reg_rtx (Pmode
);
17557 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
17558 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
17559 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
17561 emit_move_insn (fnaddr_reg
, fnaddr
);
17563 /* The ABI requires the entry point address to be loaded first, but
17564 since we cannot support lazy binding for lack of atomic load of
17565 two 32-bits values, we do not need to bother to prevent the
17566 previous load from being moved after that of the GOT address. */
17567 emit_insn (gen_restore_pic_register_after_call (pic_reg
, gotaddr
));
17572 /* Return the maximum amount of padding that will be inserted before
17574 static HOST_WIDE_INT
17575 get_label_padding (rtx label
)
17577 HOST_WIDE_INT align
, min_insn_size
;
17579 align
= 1 << label_to_alignment (label
).levels
[0].log
;
17580 min_insn_size
= TARGET_THUMB
? 2 : 4;
17581 return align
> min_insn_size
? align
- min_insn_size
: 0;
17584 /* Move a minipool fix MP from its current location to before MAX_MP.
17585 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17586 constraints may need updating. */
17588 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
17589 HOST_WIDE_INT max_address
)
17591 /* The code below assumes these are different. */
17592 gcc_assert (mp
!= max_mp
);
17594 if (max_mp
== NULL
)
17596 if (max_address
< mp
->max_address
)
17597 mp
->max_address
= max_address
;
17601 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
17602 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
17604 mp
->max_address
= max_address
;
17606 /* Unlink MP from its current position. Since max_mp is non-null,
17607 mp->prev must be non-null. */
17608 mp
->prev
->next
= mp
->next
;
17609 if (mp
->next
!= NULL
)
17610 mp
->next
->prev
= mp
->prev
;
17612 minipool_vector_tail
= mp
->prev
;
17614 /* Re-insert it before MAX_MP. */
17616 mp
->prev
= max_mp
->prev
;
17619 if (mp
->prev
!= NULL
)
17620 mp
->prev
->next
= mp
;
17622 minipool_vector_head
= mp
;
17625 /* Save the new entry. */
17628 /* Scan over the preceding entries and adjust their addresses as
17630 while (mp
->prev
!= NULL
17631 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
17633 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
17640 /* Add a constant to the minipool for a forward reference. Returns the
17641 node added or NULL if the constant will not fit in this pool. */
17643 add_minipool_forward_ref (Mfix
*fix
)
17645 /* If set, max_mp is the first pool_entry that has a lower
17646 constraint than the one we are trying to add. */
17647 Mnode
* max_mp
= NULL
;
17648 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
17651 /* If the minipool starts before the end of FIX->INSN then this FIX
17652 cannot be placed into the current pool. Furthermore, adding the
17653 new constant pool entry may cause the pool to start FIX_SIZE bytes
17655 if (minipool_vector_head
&&
17656 (fix
->address
+ get_attr_length (fix
->insn
)
17657 >= minipool_vector_head
->max_address
- fix
->fix_size
))
17660 /* Scan the pool to see if a constant with the same value has
17661 already been added. While we are doing this, also note the
17662 location where we must insert the constant if it doesn't already
17664 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17666 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
17667 && fix
->mode
== mp
->mode
17668 && (!LABEL_P (fix
->value
)
17669 || (CODE_LABEL_NUMBER (fix
->value
)
17670 == CODE_LABEL_NUMBER (mp
->value
)))
17671 && rtx_equal_p (fix
->value
, mp
->value
))
17673 /* More than one fix references this entry. */
17675 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
17678 /* Note the insertion point if necessary. */
17680 && mp
->max_address
> max_address
)
17683 /* If we are inserting an 8-bytes aligned quantity and
17684 we have not already found an insertion point, then
17685 make sure that all such 8-byte aligned quantities are
17686 placed at the start of the pool. */
17687 if (ARM_DOUBLEWORD_ALIGN
17689 && fix
->fix_size
>= 8
17690 && mp
->fix_size
< 8)
17693 max_address
= mp
->max_address
;
17697 /* The value is not currently in the minipool, so we need to create
17698 a new entry for it. If MAX_MP is NULL, the entry will be put on
17699 the end of the list since the placement is less constrained than
17700 any existing entry. Otherwise, we insert the new fix before
17701 MAX_MP and, if necessary, adjust the constraints on the other
17704 mp
->fix_size
= fix
->fix_size
;
17705 mp
->mode
= fix
->mode
;
17706 mp
->value
= fix
->value
;
17708 /* Not yet required for a backwards ref. */
17709 mp
->min_address
= -65536;
17711 if (max_mp
== NULL
)
17713 mp
->max_address
= max_address
;
17715 mp
->prev
= minipool_vector_tail
;
17717 if (mp
->prev
== NULL
)
17719 minipool_vector_head
= mp
;
17720 minipool_vector_label
= gen_label_rtx ();
17723 mp
->prev
->next
= mp
;
17725 minipool_vector_tail
= mp
;
17729 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
17730 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
17732 mp
->max_address
= max_address
;
17735 mp
->prev
= max_mp
->prev
;
17737 if (mp
->prev
!= NULL
)
17738 mp
->prev
->next
= mp
;
17740 minipool_vector_head
= mp
;
17743 /* Save the new entry. */
17746 /* Scan over the preceding entries and adjust their addresses as
17748 while (mp
->prev
!= NULL
17749 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
17751 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
17759 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
17760 HOST_WIDE_INT min_address
)
17762 HOST_WIDE_INT offset
;
17764 /* The code below assumes these are different. */
17765 gcc_assert (mp
!= min_mp
);
17767 if (min_mp
== NULL
)
17769 if (min_address
> mp
->min_address
)
17770 mp
->min_address
= min_address
;
17774 /* We will adjust this below if it is too loose. */
17775 mp
->min_address
= min_address
;
17777 /* Unlink MP from its current position. Since min_mp is non-null,
17778 mp->next must be non-null. */
17779 mp
->next
->prev
= mp
->prev
;
17780 if (mp
->prev
!= NULL
)
17781 mp
->prev
->next
= mp
->next
;
17783 minipool_vector_head
= mp
->next
;
17785 /* Reinsert it after MIN_MP. */
17787 mp
->next
= min_mp
->next
;
17789 if (mp
->next
!= NULL
)
17790 mp
->next
->prev
= mp
;
17792 minipool_vector_tail
= mp
;
17798 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17800 mp
->offset
= offset
;
17801 if (mp
->refcount
> 0)
17802 offset
+= mp
->fix_size
;
17804 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
17805 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
17811 /* Add a constant to the minipool for a backward reference. Returns the
17812 node added or NULL if the constant will not fit in this pool.
17814 Note that the code for insertion for a backwards reference can be
17815 somewhat confusing because the calculated offsets for each fix do
17816 not take into account the size of the pool (which is still under
17819 add_minipool_backward_ref (Mfix
*fix
)
17821 /* If set, min_mp is the last pool_entry that has a lower constraint
17822 than the one we are trying to add. */
17823 Mnode
*min_mp
= NULL
;
17824 /* This can be negative, since it is only a constraint. */
17825 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
17828 /* If we can't reach the current pool from this insn, or if we can't
17829 insert this entry at the end of the pool without pushing other
17830 fixes out of range, then we don't try. This ensures that we
17831 can't fail later on. */
17832 if (min_address
>= minipool_barrier
->address
17833 || (minipool_vector_tail
->min_address
+ fix
->fix_size
17834 >= minipool_barrier
->address
))
17837 /* Scan the pool to see if a constant with the same value has
17838 already been added. While we are doing this, also note the
17839 location where we must insert the constant if it doesn't already
17841 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
17843 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
17844 && fix
->mode
== mp
->mode
17845 && (!LABEL_P (fix
->value
)
17846 || (CODE_LABEL_NUMBER (fix
->value
)
17847 == CODE_LABEL_NUMBER (mp
->value
)))
17848 && rtx_equal_p (fix
->value
, mp
->value
)
17849 /* Check that there is enough slack to move this entry to the
17850 end of the table (this is conservative). */
17851 && (mp
->max_address
17852 > (minipool_barrier
->address
17853 + minipool_vector_tail
->offset
17854 + minipool_vector_tail
->fix_size
)))
17857 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
17860 if (min_mp
!= NULL
)
17861 mp
->min_address
+= fix
->fix_size
;
17864 /* Note the insertion point if necessary. */
17865 if (mp
->min_address
< min_address
)
17867 /* For now, we do not allow the insertion of 8-byte alignment
17868 requiring nodes anywhere but at the start of the pool. */
17869 if (ARM_DOUBLEWORD_ALIGN
17870 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
17875 else if (mp
->max_address
17876 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
17878 /* Inserting before this entry would push the fix beyond
17879 its maximum address (which can happen if we have
17880 re-located a forwards fix); force the new fix to come
17882 if (ARM_DOUBLEWORD_ALIGN
17883 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
17888 min_address
= mp
->min_address
+ fix
->fix_size
;
17891 /* Do not insert a non-8-byte aligned quantity before 8-byte
17892 aligned quantities. */
17893 else if (ARM_DOUBLEWORD_ALIGN
17894 && fix
->fix_size
< 8
17895 && mp
->fix_size
>= 8)
17898 min_address
= mp
->min_address
+ fix
->fix_size
;
17903 /* We need to create a new entry. */
17905 mp
->fix_size
= fix
->fix_size
;
17906 mp
->mode
= fix
->mode
;
17907 mp
->value
= fix
->value
;
17909 mp
->max_address
= minipool_barrier
->address
+ 65536;
17911 mp
->min_address
= min_address
;
17913 if (min_mp
== NULL
)
17916 mp
->next
= minipool_vector_head
;
17918 if (mp
->next
== NULL
)
17920 minipool_vector_tail
= mp
;
17921 minipool_vector_label
= gen_label_rtx ();
17924 mp
->next
->prev
= mp
;
17926 minipool_vector_head
= mp
;
17930 mp
->next
= min_mp
->next
;
17934 if (mp
->next
!= NULL
)
17935 mp
->next
->prev
= mp
;
17937 minipool_vector_tail
= mp
;
17940 /* Save the new entry. */
17948 /* Scan over the following entries and adjust their offsets. */
17949 while (mp
->next
!= NULL
)
17951 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
17952 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
17955 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
17957 mp
->next
->offset
= mp
->offset
;
17966 assign_minipool_offsets (Mfix
*barrier
)
17968 HOST_WIDE_INT offset
= 0;
17971 minipool_barrier
= barrier
;
17973 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17975 mp
->offset
= offset
;
17977 if (mp
->refcount
> 0)
17978 offset
+= mp
->fix_size
;
17982 /* Output the literal table */
17984 dump_minipool (rtx_insn
*scan
)
17990 if (ARM_DOUBLEWORD_ALIGN
)
17991 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17992 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
17999 fprintf (dump_file
,
18000 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
18001 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
18003 scan
= emit_label_after (gen_label_rtx (), scan
);
18004 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
18005 scan
= emit_label_after (minipool_vector_label
, scan
);
18007 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
18009 if (mp
->refcount
> 0)
18013 fprintf (dump_file
,
18014 ";; Offset %u, min %ld, max %ld ",
18015 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
18016 (unsigned long) mp
->max_address
);
18017 arm_print_value (dump_file
, mp
->value
);
18018 fputc ('\n', dump_file
);
18021 rtx val
= copy_rtx (mp
->value
);
18023 switch (GET_MODE_SIZE (mp
->mode
))
18025 #ifdef HAVE_consttable_1
18027 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
18031 #ifdef HAVE_consttable_2
18033 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
18037 #ifdef HAVE_consttable_4
18039 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
18043 #ifdef HAVE_consttable_8
18045 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
18049 #ifdef HAVE_consttable_16
18051 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
18056 gcc_unreachable ();
18064 minipool_vector_head
= minipool_vector_tail
= NULL
;
18065 scan
= emit_insn_after (gen_consttable_end (), scan
);
18066 scan
= emit_barrier_after (scan
);
18069 /* Return the cost of forcibly inserting a barrier after INSN. */
18071 arm_barrier_cost (rtx_insn
*insn
)
18073 /* Basing the location of the pool on the loop depth is preferable,
18074 but at the moment, the basic block information seems to be
18075 corrupt by this stage of the compilation. */
18076 int base_cost
= 50;
18077 rtx_insn
*next
= next_nonnote_insn (insn
);
18079 if (next
!= NULL
&& LABEL_P (next
))
18082 switch (GET_CODE (insn
))
18085 /* It will always be better to place the table before the label, rather
18094 return base_cost
- 10;
18097 return base_cost
+ 10;
18101 /* Find the best place in the insn stream in the range
18102 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18103 Create the barrier by inserting a jump and add a new fix entry for
18106 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
18108 HOST_WIDE_INT count
= 0;
18109 rtx_barrier
*barrier
;
18110 rtx_insn
*from
= fix
->insn
;
18111 /* The instruction after which we will insert the jump. */
18112 rtx_insn
*selected
= NULL
;
18114 /* The address at which the jump instruction will be placed. */
18115 HOST_WIDE_INT selected_address
;
18117 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
18118 rtx_code_label
*label
= gen_label_rtx ();
18120 selected_cost
= arm_barrier_cost (from
);
18121 selected_address
= fix
->address
;
18123 while (from
&& count
< max_count
)
18125 rtx_jump_table_data
*tmp
;
18128 /* This code shouldn't have been called if there was a natural barrier
18130 gcc_assert (!BARRIER_P (from
));
18132 /* Count the length of this insn. This must stay in sync with the
18133 code that pushes minipool fixes. */
18134 if (LABEL_P (from
))
18135 count
+= get_label_padding (from
);
18137 count
+= get_attr_length (from
);
18139 /* If there is a jump table, add its length. */
18140 if (tablejump_p (from
, NULL
, &tmp
))
18142 count
+= get_jump_table_size (tmp
);
18144 /* Jump tables aren't in a basic block, so base the cost on
18145 the dispatch insn. If we select this location, we will
18146 still put the pool after the table. */
18147 new_cost
= arm_barrier_cost (from
);
18149 if (count
< max_count
18150 && (!selected
|| new_cost
<= selected_cost
))
18153 selected_cost
= new_cost
;
18154 selected_address
= fix
->address
+ count
;
18157 /* Continue after the dispatch table. */
18158 from
= NEXT_INSN (tmp
);
18162 new_cost
= arm_barrier_cost (from
);
18164 if (count
< max_count
18165 && (!selected
|| new_cost
<= selected_cost
))
18168 selected_cost
= new_cost
;
18169 selected_address
= fix
->address
+ count
;
18172 from
= NEXT_INSN (from
);
18175 /* Make sure that we found a place to insert the jump. */
18176 gcc_assert (selected
);
18178 /* Create a new JUMP_INSN that branches around a barrier. */
18179 from
= emit_jump_insn_after (gen_jump (label
), selected
);
18180 JUMP_LABEL (from
) = label
;
18181 barrier
= emit_barrier_after (from
);
18182 emit_label_after (label
, barrier
);
18184 /* Create a minipool barrier entry for the new barrier. */
18185 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
18186 new_fix
->insn
= barrier
;
18187 new_fix
->address
= selected_address
;
18188 new_fix
->next
= fix
->next
;
18189 fix
->next
= new_fix
;
18194 /* Record that there is a natural barrier in the insn stream at
18197 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
18199 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
18202 fix
->address
= address
;
18205 if (minipool_fix_head
!= NULL
)
18206 minipool_fix_tail
->next
= fix
;
18208 minipool_fix_head
= fix
;
18210 minipool_fix_tail
= fix
;
18213 /* Record INSN, which will need fixing up to load a value from the
18214 minipool. ADDRESS is the offset of the insn since the start of the
18215 function; LOC is a pointer to the part of the insn which requires
18216 fixing; VALUE is the constant that must be loaded, which is of type
18219 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
18220 machine_mode mode
, rtx value
)
18222 gcc_assert (!arm_disable_literal_pool
);
18223 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
18226 fix
->address
= address
;
18229 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
18230 fix
->value
= value
;
18231 fix
->forwards
= get_attr_pool_range (insn
);
18232 fix
->backwards
= get_attr_neg_pool_range (insn
);
18233 fix
->minipool
= NULL
;
18235 /* If an insn doesn't have a range defined for it, then it isn't
18236 expecting to be reworked by this code. Better to stop now than
18237 to generate duff assembly code. */
18238 gcc_assert (fix
->forwards
|| fix
->backwards
);
18240 /* If an entry requires 8-byte alignment then assume all constant pools
18241 require 4 bytes of padding. Trying to do this later on a per-pool
18242 basis is awkward because existing pool entries have to be modified. */
18243 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
18248 fprintf (dump_file
,
18249 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18250 GET_MODE_NAME (mode
),
18251 INSN_UID (insn
), (unsigned long) address
,
18252 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
18253 arm_print_value (dump_file
, fix
->value
);
18254 fprintf (dump_file
, "\n");
18257 /* Add it to the chain of fixes. */
18260 if (minipool_fix_head
!= NULL
)
18261 minipool_fix_tail
->next
= fix
;
18263 minipool_fix_head
= fix
;
18265 minipool_fix_tail
= fix
;
18268 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18269 Returns the number of insns needed, or 99 if we always want to synthesize
18272 arm_max_const_double_inline_cost ()
18274 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
18277 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18278 Returns the number of insns needed, or 99 if we don't know how to
18281 arm_const_double_inline_cost (rtx val
)
18283 rtx lowpart
, highpart
;
18286 mode
= GET_MODE (val
);
18288 if (mode
== VOIDmode
)
18291 gcc_assert (GET_MODE_SIZE (mode
) == 8);
18293 lowpart
= gen_lowpart (SImode
, val
);
18294 highpart
= gen_highpart_mode (SImode
, mode
, val
);
18296 gcc_assert (CONST_INT_P (lowpart
));
18297 gcc_assert (CONST_INT_P (highpart
));
18299 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
18300 NULL_RTX
, NULL_RTX
, 0, 0)
18301 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
18302 NULL_RTX
, NULL_RTX
, 0, 0));
18305 /* Cost of loading a SImode constant. */
18307 arm_const_inline_cost (enum rtx_code code
, rtx val
)
18309 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
18310 NULL_RTX
, NULL_RTX
, 1, 0);
18313 /* Return true if it is worthwhile to split a 64-bit constant into two
18314 32-bit operations. This is the case if optimizing for size, or
18315 if we have load delay slots, or if one 32-bit part can be done with
18316 a single data operation. */
18318 arm_const_double_by_parts (rtx val
)
18320 machine_mode mode
= GET_MODE (val
);
18323 if (optimize_size
|| arm_ld_sched
)
18326 if (mode
== VOIDmode
)
18329 part
= gen_highpart_mode (SImode
, mode
, val
);
18331 gcc_assert (CONST_INT_P (part
));
18333 if (const_ok_for_arm (INTVAL (part
))
18334 || const_ok_for_arm (~INTVAL (part
)))
18337 part
= gen_lowpart (SImode
, val
);
18339 gcc_assert (CONST_INT_P (part
));
18341 if (const_ok_for_arm (INTVAL (part
))
18342 || const_ok_for_arm (~INTVAL (part
)))
18348 /* Return true if it is possible to inline both the high and low parts
18349 of a 64-bit constant into 32-bit data processing instructions. */
18351 arm_const_double_by_immediates (rtx val
)
18353 machine_mode mode
= GET_MODE (val
);
18356 if (mode
== VOIDmode
)
18359 part
= gen_highpart_mode (SImode
, mode
, val
);
18361 gcc_assert (CONST_INT_P (part
));
18363 if (!const_ok_for_arm (INTVAL (part
)))
18366 part
= gen_lowpart (SImode
, val
);
18368 gcc_assert (CONST_INT_P (part
));
18370 if (!const_ok_for_arm (INTVAL (part
)))
18376 /* Scan INSN and note any of its operands that need fixing.
18377 If DO_PUSHES is false we do not actually push any of the fixups
18380 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
18384 extract_constrain_insn (insn
);
18386 if (recog_data
.n_alternatives
== 0)
18389 /* Fill in recog_op_alt with information about the constraints of
18391 preprocess_constraints (insn
);
18393 const operand_alternative
*op_alt
= which_op_alt ();
18394 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
18396 /* Things we need to fix can only occur in inputs. */
18397 if (recog_data
.operand_type
[opno
] != OP_IN
)
18400 /* If this alternative is a memory reference, then any mention
18401 of constants in this alternative is really to fool reload
18402 into allowing us to accept one there. We need to fix them up
18403 now so that we output the right code. */
18404 if (op_alt
[opno
].memory_ok
)
18406 rtx op
= recog_data
.operand
[opno
];
18408 if (CONSTANT_P (op
))
18411 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
18412 recog_data
.operand_mode
[opno
], op
);
18414 else if (MEM_P (op
)
18415 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
18416 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
18420 rtx cop
= avoid_constant_pool_reference (op
);
18422 /* Casting the address of something to a mode narrower
18423 than a word can cause avoid_constant_pool_reference()
18424 to return the pool reference itself. That's no good to
18425 us here. Lets just hope that we can use the
18426 constant pool value directly. */
18428 cop
= get_pool_constant (XEXP (op
, 0));
18430 push_minipool_fix (insn
, address
,
18431 recog_data
.operand_loc
[opno
],
18432 recog_data
.operand_mode
[opno
], cop
);
18442 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18443 and unions in the context of ARMv8-M Security Extensions. It is used as a
18444 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18445 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18446 or four masks, depending on whether it is being computed for a
18447 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18448 respectively. The tree for the type of the argument or a field within an
18449 argument is passed in ARG_TYPE, the current register this argument or field
18450 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18451 argument or field starts at is passed in STARTING_BIT and the last used bit
18452 is kept in LAST_USED_BIT which is also updated accordingly. */
18454 static unsigned HOST_WIDE_INT
18455 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
18456 uint32_t * padding_bits_to_clear
,
18457 unsigned starting_bit
, int * last_used_bit
)
18460 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
18462 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
18464 unsigned current_bit
= starting_bit
;
18466 long int offset
, size
;
18469 field
= TYPE_FIELDS (arg_type
);
18472 /* The offset within a structure is always an offset from
18473 the start of that structure. Make sure we take that into the
18474 calculation of the register based offset that we use here. */
18475 offset
= starting_bit
;
18476 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
18479 /* This is the actual size of the field, for bitfields this is the
18480 bitfield width and not the container size. */
18481 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
18483 if (*last_used_bit
!= offset
)
18485 if (offset
< *last_used_bit
)
18487 /* This field's offset is before the 'last_used_bit', that
18488 means this field goes on the next register. So we need to
18489 pad the rest of the current register and increase the
18490 register number. */
18492 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
18495 padding_bits_to_clear
[*regno
] |= mask
;
18496 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18501 /* Otherwise we pad the bits between the last field's end and
18502 the start of the new field. */
18505 mask
= ((uint32_t)-1) >> (32 - offset
);
18506 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
18507 padding_bits_to_clear
[*regno
] |= mask
;
18509 current_bit
= offset
;
18512 /* Calculate further padding bits for inner structs/unions too. */
18513 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
18515 *last_used_bit
= current_bit
;
18516 not_to_clear_reg_mask
18517 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
18518 padding_bits_to_clear
, offset
,
18523 /* Update 'current_bit' with this field's size. If the
18524 'current_bit' lies in a subsequent register, update 'regno' and
18525 reset 'current_bit' to point to the current bit in that new
18527 current_bit
+= size
;
18528 while (current_bit
>= 32)
18531 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18534 *last_used_bit
= current_bit
;
18537 field
= TREE_CHAIN (field
);
18539 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18541 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
18543 tree field
, field_t
;
18544 int i
, regno_t
, field_size
;
18548 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
18549 = {-1, -1, -1, -1};
18551 /* To compute the padding bits in a union we only consider bits as
18552 padding bits if they are always either a padding bit or fall outside a
18553 fields size for all fields in the union. */
18554 field
= TYPE_FIELDS (arg_type
);
18557 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
18558 = {0U, 0U, 0U, 0U};
18559 int last_used_bit_t
= *last_used_bit
;
18561 field_t
= TREE_TYPE (field
);
18563 /* If the field's type is either a record or a union make sure to
18564 compute their padding bits too. */
18565 if (RECORD_OR_UNION_TYPE_P (field_t
))
18566 not_to_clear_reg_mask
18567 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
18568 &padding_bits_to_clear_t
[0],
18569 starting_bit
, &last_used_bit_t
);
18572 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
18573 regno_t
= (field_size
/ 32) + *regno
;
18574 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
18577 for (i
= *regno
; i
< regno_t
; i
++)
18579 /* For all but the last register used by this field only keep the
18580 padding bits that were padding bits in this field. */
18581 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
18584 /* For the last register, keep all padding bits that were padding
18585 bits in this field and any padding bits that are still valid
18586 as padding bits but fall outside of this field's size. */
18587 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
18588 padding_bits_to_clear_res
[regno_t
]
18589 &= padding_bits_to_clear_t
[regno_t
] | mask
;
18591 /* Update the maximum size of the fields in terms of registers used
18592 ('max_reg') and the 'last_used_bit' in said register. */
18593 if (max_reg
< regno_t
)
18596 max_bit
= last_used_bit_t
;
18598 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
18599 max_bit
= last_used_bit_t
;
18601 field
= TREE_CHAIN (field
);
18604 /* Update the current padding_bits_to_clear using the intersection of the
18605 padding bits of all the fields. */
18606 for (i
=*regno
; i
< max_reg
; i
++)
18607 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
18609 /* Do not keep trailing padding bits, we do not know yet whether this
18610 is the end of the argument. */
18611 mask
= ((uint32_t) 1 << max_bit
) - 1;
18612 padding_bits_to_clear
[max_reg
]
18613 |= padding_bits_to_clear_res
[max_reg
] & mask
;
18616 *last_used_bit
= max_bit
;
18619 /* This function should only be used for structs and unions. */
18620 gcc_unreachable ();
18622 return not_to_clear_reg_mask
;
18625 /* In the context of ARMv8-M Security Extensions, this function is used for both
18626 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18627 registers are used when returning or passing arguments, which is then
18628 returned as a mask. It will also compute a mask to indicate padding/unused
18629 bits for each of these registers, and passes this through the
18630 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18631 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18632 the starting register used to pass this argument or return value is passed
18633 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18634 for struct and union types. */
18636 static unsigned HOST_WIDE_INT
18637 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
18638 uint32_t * padding_bits_to_clear
)
18641 int last_used_bit
= 0;
18642 unsigned HOST_WIDE_INT not_to_clear_mask
;
18644 if (RECORD_OR_UNION_TYPE_P (arg_type
))
18647 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
18648 padding_bits_to_clear
, 0,
18652 /* If the 'last_used_bit' is not zero, that means we are still using a
18653 part of the last 'regno'. In such cases we must clear the trailing
18654 bits. Otherwise we are not using regno and we should mark it as to
18656 if (last_used_bit
!= 0)
18657 padding_bits_to_clear
[regno
]
18658 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
18660 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
18664 not_to_clear_mask
= 0;
18665 /* We are not dealing with structs nor unions. So these arguments may be
18666 passed in floating point registers too. In some cases a BLKmode is
18667 used when returning or passing arguments in multiple VFP registers. */
18668 if (GET_MODE (arg_rtx
) == BLKmode
)
18673 /* This should really only occur when dealing with the hard-float
18675 gcc_assert (TARGET_HARD_FLOAT_ABI
);
18677 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
18679 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
18680 gcc_assert (REG_P (reg
));
18682 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
18684 /* If we are dealing with DF mode, make sure we don't
18685 clear either of the registers it addresses. */
18686 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
18689 unsigned HOST_WIDE_INT mask
;
18690 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
18691 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
18692 not_to_clear_mask
|= mask
;
18698 /* Otherwise we can rely on the MODE to determine how many registers
18699 are being used by this argument. */
18700 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
18701 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
18704 unsigned HOST_WIDE_INT
18705 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
18706 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
18707 not_to_clear_mask
|= mask
;
18712 return not_to_clear_mask
;
18715 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18716 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18717 are to be fully cleared, using the value in register CLEARING_REG if more
18718 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18719 the bits that needs to be cleared in caller-saved core registers, with
18720 SCRATCH_REG used as a scratch register for that clearing.
18722 NOTE: one of three following assertions must hold:
18723 - SCRATCH_REG is a low register
18724 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18725 in TO_CLEAR_BITMAP)
18726 - CLEARING_REG is a low register. */
18729 cmse_clear_registers (sbitmap to_clear_bitmap
, uint32_t *padding_bits_to_clear
,
18730 int padding_bits_len
, rtx scratch_reg
, rtx clearing_reg
)
18732 bool saved_clearing
= false;
18733 rtx saved_clearing_reg
= NULL_RTX
;
18734 int i
, regno
, clearing_regno
, minregno
= R0_REGNUM
, maxregno
= minregno
- 1;
18736 gcc_assert (arm_arch_cmse
);
18738 if (!bitmap_empty_p (to_clear_bitmap
))
18740 minregno
= bitmap_first_set_bit (to_clear_bitmap
);
18741 maxregno
= bitmap_last_set_bit (to_clear_bitmap
);
18743 clearing_regno
= REGNO (clearing_reg
);
18745 /* Clear padding bits. */
18746 gcc_assert (padding_bits_len
<= NUM_ARG_REGS
);
18747 for (i
= 0, regno
= R0_REGNUM
; i
< padding_bits_len
; i
++, regno
++)
18750 rtx rtx16
, dest
, cleared_reg
= gen_rtx_REG (SImode
, regno
);
18752 if (padding_bits_to_clear
[i
] == 0)
18755 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18756 CLEARING_REG as scratch. */
18758 && REGNO (scratch_reg
) > LAST_LO_REGNUM
)
18760 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18761 such that we can use clearing_reg to clear the unused bits in the
18763 if ((clearing_regno
> maxregno
18764 || !bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
18765 && !saved_clearing
)
18767 gcc_assert (clearing_regno
<= LAST_LO_REGNUM
);
18768 emit_move_insn (scratch_reg
, clearing_reg
);
18769 saved_clearing
= true;
18770 saved_clearing_reg
= scratch_reg
;
18772 scratch_reg
= clearing_reg
;
18775 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18776 mask
= (~padding_bits_to_clear
[i
]) & 0xFFFF;
18777 emit_move_insn (scratch_reg
, gen_int_mode (mask
, SImode
));
18779 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18780 mask
= (~padding_bits_to_clear
[i
]) >> 16;
18781 rtx16
= gen_int_mode (16, SImode
);
18782 dest
= gen_rtx_ZERO_EXTRACT (SImode
, scratch_reg
, rtx16
, rtx16
);
18784 emit_insn (gen_rtx_SET (dest
, gen_int_mode (mask
, SImode
)));
18786 emit_insn (gen_andsi3 (cleared_reg
, cleared_reg
, scratch_reg
));
18788 if (saved_clearing
)
18789 emit_move_insn (clearing_reg
, saved_clearing_reg
);
18792 /* Clear full registers. */
18794 if (TARGET_HAVE_FPCXT_CMSE
)
18797 int i
, j
, k
, nb_regs
;
18798 rtx use_seq
, par
, reg
, set
, vunspec
;
18799 int to_clear_bitmap_size
= SBITMAP_SIZE (to_clear_bitmap
);
18800 auto_sbitmap
core_regs_bitmap (to_clear_bitmap_size
);
18801 auto_sbitmap
to_clear_core_bitmap (to_clear_bitmap_size
);
18803 for (i
= FIRST_VFP_REGNUM
; i
<= maxregno
; i
+= nb_regs
)
18805 /* Find next register to clear and exit if none. */
18806 for (; i
<= maxregno
&& !bitmap_bit_p (to_clear_bitmap
, i
); i
++);
18810 /* Compute number of consecutive registers to clear. */
18811 for (j
= i
; j
<= maxregno
&& bitmap_bit_p (to_clear_bitmap
, j
);
18815 /* Create VSCCLRM RTX pattern. */
18816 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nb_regs
+ 1));
18817 vunspec_vec
= gen_rtvec (1, gen_int_mode (0, SImode
));
18818 vunspec
= gen_rtx_UNSPEC_VOLATILE (SImode
, vunspec_vec
,
18819 VUNSPEC_VSCCLRM_VPR
);
18820 XVECEXP (par
, 0, 0) = vunspec
;
18822 /* Insert VFP register clearing RTX in the pattern. */
18824 for (k
= 1, j
= i
; j
<= maxregno
&& k
< nb_regs
+ 1; j
++)
18826 if (!bitmap_bit_p (to_clear_bitmap
, j
))
18829 reg
= gen_rtx_REG (SFmode
, j
);
18830 set
= gen_rtx_SET (reg
, const0_rtx
);
18831 XVECEXP (par
, 0, k
++) = set
;
18834 use_seq
= get_insns ();
18837 emit_insn_after (use_seq
, emit_insn (par
));
18840 /* Get set of core registers to clear. */
18841 bitmap_clear (core_regs_bitmap
);
18842 bitmap_set_range (core_regs_bitmap
, R0_REGNUM
,
18843 IP_REGNUM
- R0_REGNUM
+ 1);
18844 bitmap_and (to_clear_core_bitmap
, to_clear_bitmap
,
18846 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap
));
18848 if (bitmap_empty_p (to_clear_core_bitmap
))
18851 /* Create clrm RTX pattern. */
18852 nb_regs
= bitmap_count_bits (to_clear_core_bitmap
);
18853 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nb_regs
+ 2));
18855 /* Insert core register clearing RTX in the pattern. */
18857 for (j
= 0, i
= minregno
; j
< nb_regs
; i
++)
18859 if (!bitmap_bit_p (to_clear_core_bitmap
, i
))
18862 reg
= gen_rtx_REG (SImode
, i
);
18863 set
= gen_rtx_SET (reg
, const0_rtx
);
18864 XVECEXP (par
, 0, j
++) = set
;
18868 /* Insert APSR register clearing RTX in the pattern
18869 * along with clobbering CC. */
18870 vunspec_vec
= gen_rtvec (1, gen_int_mode (0, SImode
));
18871 vunspec
= gen_rtx_UNSPEC_VOLATILE (SImode
, vunspec_vec
,
18872 VUNSPEC_CLRM_APSR
);
18874 XVECEXP (par
, 0, j
++) = vunspec
;
18876 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
18877 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
18878 XVECEXP (par
, 0, j
) = clobber
;
18880 use_seq
= get_insns ();
18883 emit_insn_after (use_seq
, emit_insn (par
));
18887 /* If not marked for clearing, clearing_reg already does not contain
18889 if (clearing_regno
<= maxregno
18890 && bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
18892 emit_move_insn (clearing_reg
, const0_rtx
);
18893 emit_use (clearing_reg
);
18894 bitmap_clear_bit (to_clear_bitmap
, clearing_regno
);
18897 for (regno
= minregno
; regno
<= maxregno
; regno
++)
18899 if (!bitmap_bit_p (to_clear_bitmap
, regno
))
18902 if (IS_VFP_REGNUM (regno
))
18904 /* If regno is an even vfp register and its successor is also to
18905 be cleared, use vmov. */
18906 if (TARGET_VFP_DOUBLE
18907 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
18908 && bitmap_bit_p (to_clear_bitmap
, regno
+ 1))
18910 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
18911 CONST1_RTX (DFmode
));
18912 emit_use (gen_rtx_REG (DFmode
, regno
));
18917 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
18918 CONST1_RTX (SFmode
));
18919 emit_use (gen_rtx_REG (SFmode
, regno
));
18924 emit_move_insn (gen_rtx_REG (SImode
, regno
), clearing_reg
);
18925 emit_use (gen_rtx_REG (SImode
, regno
));
18931 /* Clear core and caller-saved VFP registers not used to pass arguments before
18932 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
18933 registers is done in the __gnu_cmse_nonsecure_call libcall. See
18934 libgcc/config/arm/cmse_nonsecure_call.S. */
18937 cmse_nonsecure_call_inline_register_clear (void)
18941 FOR_EACH_BB_FN (bb
, cfun
)
18945 FOR_BB_INSNS (bb
, insn
)
18947 bool clear_callee_saved
= TARGET_HAVE_FPCXT_CMSE
;
18948 /* frame = VFP regs + FPSCR + VPR. */
18949 unsigned lazy_store_stack_frame_size
18950 = (LAST_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1 + 2) * UNITS_PER_WORD
;
18951 unsigned long callee_saved_mask
18952 = ((1 << (LAST_HI_REGNUM
+ 1)) - 1)
18953 & ~((1 << (LAST_ARG_REGNUM
+ 1)) - 1);
18954 unsigned address_regnum
, regno
;
18955 unsigned max_int_regno
18956 = clear_callee_saved
? IP_REGNUM
: LAST_ARG_REGNUM
;
18957 unsigned max_fp_regno
18958 = TARGET_HAVE_FPCXT_CMSE
? LAST_VFP_REGNUM
: D7_VFP_REGNUM
;
18960 = TARGET_HARD_FLOAT_ABI
? max_fp_regno
: max_int_regno
;
18961 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
18963 rtx pat
, call
, unspec
, clearing_reg
, ip_reg
, shift
;
18965 CUMULATIVE_ARGS args_so_far_v
;
18966 cumulative_args_t args_so_far
;
18967 tree arg_type
, fntype
;
18968 bool first_param
= true, lazy_fpclear
= !TARGET_HARD_FLOAT_ABI
;
18969 function_args_iterator args_iter
;
18970 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
18972 if (!NONDEBUG_INSN_P (insn
))
18975 if (!CALL_P (insn
))
18978 pat
= PATTERN (insn
);
18979 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
18980 call
= XVECEXP (pat
, 0, 0);
18982 /* Get the real call RTX if the insn sets a value, ie. returns. */
18983 if (GET_CODE (call
) == SET
)
18984 call
= SET_SRC (call
);
18986 /* Check if it is a cmse_nonsecure_call. */
18987 unspec
= XEXP (call
, 0);
18988 if (GET_CODE (unspec
) != UNSPEC
18989 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
18992 /* Mark registers that needs to be cleared. Those that holds a
18993 parameter are removed from the set further below. */
18994 bitmap_clear (to_clear_bitmap
);
18995 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
,
18996 max_int_regno
- R0_REGNUM
+ 1);
18998 /* Only look at the caller-saved floating point registers in case of
18999 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
19000 lazy store and loads which clear both caller- and callee-saved
19004 auto_sbitmap
float_bitmap (maxregno
+ 1);
19006 bitmap_clear (float_bitmap
);
19007 bitmap_set_range (float_bitmap
, FIRST_VFP_REGNUM
,
19008 max_fp_regno
- FIRST_VFP_REGNUM
+ 1);
19009 bitmap_ior (to_clear_bitmap
, to_clear_bitmap
, float_bitmap
);
19012 /* Make sure the register used to hold the function address is not
19014 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
19015 gcc_assert (MEM_P (address
));
19016 gcc_assert (REG_P (XEXP (address
, 0)));
19017 address_regnum
= REGNO (XEXP (address
, 0));
19018 if (address_regnum
<= max_int_regno
)
19019 bitmap_clear_bit (to_clear_bitmap
, address_regnum
);
19021 /* Set basic block of call insn so that df rescan is performed on
19022 insns inserted here. */
19023 set_block_for_insn (insn
, bb
);
19024 df_set_flags (DF_DEFER_INSN_RESCAN
);
19027 /* Make sure the scheduler doesn't schedule other insns beyond
19029 emit_insn (gen_blockage ());
19031 /* Walk through all arguments and clear registers appropriately.
19033 fntype
= TREE_TYPE (MEM_EXPR (address
));
19034 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
19036 args_so_far
= pack_cumulative_args (&args_so_far_v
);
19037 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
19040 uint64_t to_clear_args_mask
;
19042 if (VOID_TYPE_P (arg_type
))
19045 function_arg_info
arg (arg_type
, /*named=*/true);
19047 /* ??? We should advance after processing the argument and pass
19048 the argument we're advancing past. */
19049 arm_function_arg_advance (args_so_far
, arg
);
19051 arg_rtx
= arm_function_arg (args_so_far
, arg
);
19052 gcc_assert (REG_P (arg_rtx
));
19054 = compute_not_to_clear_mask (arg_type
, arg_rtx
,
19056 &padding_bits_to_clear
[0]);
19057 if (to_clear_args_mask
)
19059 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
19061 if (to_clear_args_mask
& (1ULL << regno
))
19062 bitmap_clear_bit (to_clear_bitmap
, regno
);
19066 first_param
= false;
19069 /* We use right shift and left shift to clear the LSB of the address
19070 we jump to instead of using bic, to avoid having to use an extra
19071 register on Thumb-1. */
19072 clearing_reg
= XEXP (address
, 0);
19073 shift
= gen_rtx_LSHIFTRT (SImode
, clearing_reg
, const1_rtx
);
19074 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
19075 shift
= gen_rtx_ASHIFT (SImode
, clearing_reg
, const1_rtx
);
19076 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
19078 if (clear_callee_saved
)
19081 emit_multi_reg_push (callee_saved_mask
, callee_saved_mask
);
19082 /* Disable frame debug info in push because it needs to be
19083 disabled for pop (see below). */
19084 RTX_FRAME_RELATED_P (push_insn
) = 0;
19086 /* Lazy store multiple. */
19090 rtx_insn
*add_insn
;
19092 imm
= gen_int_mode (- lazy_store_stack_frame_size
, SImode
);
19093 add_insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
19094 stack_pointer_rtx
, imm
));
19095 /* If we have the frame pointer, then it will be the
19096 CFA reg. Otherwise, the stack pointer is the CFA
19097 reg, so we need to emit a CFA adjust. */
19098 if (!frame_pointer_needed
)
19099 arm_add_cfa_adjust_cfa_note (add_insn
,
19100 - lazy_store_stack_frame_size
,
19102 stack_pointer_rtx
);
19103 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx
));
19105 /* Save VFP callee-saved registers. */
19108 vfp_emit_fstmd (D7_VFP_REGNUM
+ 1,
19109 (max_fp_regno
- D7_VFP_REGNUM
) / 2);
19110 /* Disable frame debug info in push because it needs to be
19111 disabled for vpop (see below). */
19112 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19116 /* Clear caller-saved registers that leak before doing a non-secure
19118 ip_reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
19119 cmse_clear_registers (to_clear_bitmap
, padding_bits_to_clear
,
19120 NUM_ARG_REGS
, ip_reg
, clearing_reg
);
19122 seq
= get_insns ();
19124 emit_insn_before (seq
, insn
);
19126 if (TARGET_HAVE_FPCXT_CMSE
)
19128 rtx_insn
*last
, *pop_insn
, *after
= insn
;
19132 /* Lazy load multiple done as part of libcall in Armv8-M. */
19135 rtx imm
= gen_int_mode (lazy_store_stack_frame_size
, SImode
);
19136 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx
));
19137 rtx_insn
*add_insn
=
19138 emit_insn (gen_addsi3 (stack_pointer_rtx
,
19139 stack_pointer_rtx
, imm
));
19140 if (!frame_pointer_needed
)
19141 arm_add_cfa_adjust_cfa_note (add_insn
,
19142 lazy_store_stack_frame_size
,
19144 stack_pointer_rtx
);
19146 /* Restore VFP callee-saved registers. */
19149 int nb_callee_saved_vfp_regs
=
19150 (max_fp_regno
- D7_VFP_REGNUM
) / 2;
19151 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM
+ 1,
19152 nb_callee_saved_vfp_regs
,
19153 stack_pointer_rtx
);
19154 /* Disable frame debug info in vpop because the SP adjustment
19155 is made using a CFA adjustment note while CFA used is
19156 sometimes R7. This then causes an assert failure in the
19157 CFI note creation code. */
19158 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19161 arm_emit_multi_reg_pop (callee_saved_mask
);
19162 pop_insn
= get_last_insn ();
19164 /* Disable frame debug info in pop because they reset the state
19165 of popped registers to what it was at the beginning of the
19166 function, before the prologue. This leads to incorrect state
19167 when doing the pop after the nonsecure call for registers that
19168 are pushed both in prologue and before the nonsecure call.
19170 It also occasionally triggers an assert failure in CFI note
19171 creation code when there are two codepaths to the epilogue,
19172 one of which does not go through the nonsecure call.
19173 Obviously this mean that debugging between the push and pop is
19175 RTX_FRAME_RELATED_P (pop_insn
) = 0;
19177 seq
= get_insns ();
19178 last
= get_last_insn ();
19181 emit_insn_after (seq
, after
);
19183 /* Skip pop we have just inserted after nonsecure call, we know
19184 it does not contain a nonsecure call. */
19191 /* Rewrite move insn into subtract of 0 if the condition codes will
19192 be useful in next conditional jump insn. */
19195 thumb1_reorg (void)
19199 FOR_EACH_BB_FN (bb
, cfun
)
19202 rtx cmp
, op0
, op1
, set
= NULL
;
19203 rtx_insn
*prev
, *insn
= BB_END (bb
);
19204 bool insn_clobbered
= false;
19206 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
19207 insn
= PREV_INSN (insn
);
19209 /* Find the last cbranchsi4_insn in basic block BB. */
19210 if (insn
== BB_HEAD (bb
)
19211 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
19214 /* Get the register with which we are comparing. */
19215 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
19216 op0
= XEXP (cmp
, 0);
19217 op1
= XEXP (cmp
, 1);
19219 /* Check that comparison is against ZERO. */
19220 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
19223 /* Find the first flag setting insn before INSN in basic block BB. */
19224 gcc_assert (insn
!= BB_HEAD (bb
));
19225 for (prev
= PREV_INSN (insn
);
19227 && prev
!= BB_HEAD (bb
)
19229 || DEBUG_INSN_P (prev
)
19230 || ((set
= single_set (prev
)) != NULL
19231 && get_attr_conds (prev
) == CONDS_NOCOND
)));
19232 prev
= PREV_INSN (prev
))
19234 if (reg_set_p (op0
, prev
))
19235 insn_clobbered
= true;
19238 /* Skip if op0 is clobbered by insn other than prev. */
19239 if (insn_clobbered
)
19245 dest
= SET_DEST (set
);
19246 src
= SET_SRC (set
);
19247 if (!low_register_operand (dest
, SImode
)
19248 || !low_register_operand (src
, SImode
))
19251 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19252 in INSN. Both src and dest of the move insn are checked. */
19253 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
19255 dest
= copy_rtx (dest
);
19256 src
= copy_rtx (src
);
19257 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
19258 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
19259 INSN_CODE (prev
) = -1;
19260 /* Set test register in INSN to dest. */
19261 XEXP (cmp
, 0) = copy_rtx (dest
);
19262 INSN_CODE (insn
) = -1;
19267 /* Convert instructions to their cc-clobbering variant if possible, since
19268 that allows us to use smaller encodings. */
19271 thumb2_reorg (void)
19276 INIT_REG_SET (&live
);
19278 /* We are freeing block_for_insn in the toplev to keep compatibility
19279 with old MDEP_REORGS that are not CFG based. Recompute it now. */
19280 compute_bb_for_insn ();
19283 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
19285 FOR_EACH_BB_FN (bb
, cfun
)
19287 if ((current_tune
->disparage_flag_setting_t16_encodings
19288 == tune_params::DISPARAGE_FLAGS_ALL
)
19289 && optimize_bb_for_speed_p (bb
))
19293 Convert_Action action
= SKIP
;
19294 Convert_Action action_for_partial_flag_setting
19295 = ((current_tune
->disparage_flag_setting_t16_encodings
19296 != tune_params::DISPARAGE_FLAGS_NEITHER
)
19297 && optimize_bb_for_speed_p (bb
))
19300 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
19301 df_simulate_initialize_backwards (bb
, &live
);
19302 FOR_BB_INSNS_REVERSE (bb
, insn
)
19304 if (NONJUMP_INSN_P (insn
)
19305 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
19306 && GET_CODE (PATTERN (insn
)) == SET
)
19309 rtx pat
= PATTERN (insn
);
19310 rtx dst
= XEXP (pat
, 0);
19311 rtx src
= XEXP (pat
, 1);
19312 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
19314 if (UNARY_P (src
) || BINARY_P (src
))
19315 op0
= XEXP (src
, 0);
19317 if (BINARY_P (src
))
19318 op1
= XEXP (src
, 1);
19320 if (low_register_operand (dst
, SImode
))
19322 switch (GET_CODE (src
))
19325 /* Adding two registers and storing the result
19326 in the first source is already a 16-bit
19328 if (rtx_equal_p (dst
, op0
)
19329 && register_operand (op1
, SImode
))
19332 if (low_register_operand (op0
, SImode
))
19334 /* ADDS <Rd>,<Rn>,<Rm> */
19335 if (low_register_operand (op1
, SImode
))
19337 /* ADDS <Rdn>,#<imm8> */
19338 /* SUBS <Rdn>,#<imm8> */
19339 else if (rtx_equal_p (dst
, op0
)
19340 && CONST_INT_P (op1
)
19341 && IN_RANGE (INTVAL (op1
), -255, 255))
19343 /* ADDS <Rd>,<Rn>,#<imm3> */
19344 /* SUBS <Rd>,<Rn>,#<imm3> */
19345 else if (CONST_INT_P (op1
)
19346 && IN_RANGE (INTVAL (op1
), -7, 7))
19349 /* ADCS <Rd>, <Rn> */
19350 else if (GET_CODE (XEXP (src
, 0)) == PLUS
19351 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
19352 && low_register_operand (XEXP (XEXP (src
, 0), 1),
19354 && COMPARISON_P (op1
)
19355 && cc_register (XEXP (op1
, 0), VOIDmode
)
19356 && maybe_get_arm_condition_code (op1
) == ARM_CS
19357 && XEXP (op1
, 1) == const0_rtx
)
19362 /* RSBS <Rd>,<Rn>,#0
19363 Not handled here: see NEG below. */
19364 /* SUBS <Rd>,<Rn>,#<imm3>
19366 Not handled here: see PLUS above. */
19367 /* SUBS <Rd>,<Rn>,<Rm> */
19368 if (low_register_operand (op0
, SImode
)
19369 && low_register_operand (op1
, SImode
))
19374 /* MULS <Rdm>,<Rn>,<Rdm>
19375 As an exception to the rule, this is only used
19376 when optimizing for size since MULS is slow on all
19377 known implementations. We do not even want to use
19378 MULS in cold code, if optimizing for speed, so we
19379 test the global flag here. */
19380 if (!optimize_size
)
19382 /* Fall through. */
19386 /* ANDS <Rdn>,<Rm> */
19387 if (rtx_equal_p (dst
, op0
)
19388 && low_register_operand (op1
, SImode
))
19389 action
= action_for_partial_flag_setting
;
19390 else if (rtx_equal_p (dst
, op1
)
19391 && low_register_operand (op0
, SImode
))
19392 action
= action_for_partial_flag_setting
== SKIP
19393 ? SKIP
: SWAP_CONV
;
19399 /* ASRS <Rdn>,<Rm> */
19400 /* LSRS <Rdn>,<Rm> */
19401 /* LSLS <Rdn>,<Rm> */
19402 if (rtx_equal_p (dst
, op0
)
19403 && low_register_operand (op1
, SImode
))
19404 action
= action_for_partial_flag_setting
;
19405 /* ASRS <Rd>,<Rm>,#<imm5> */
19406 /* LSRS <Rd>,<Rm>,#<imm5> */
19407 /* LSLS <Rd>,<Rm>,#<imm5> */
19408 else if (low_register_operand (op0
, SImode
)
19409 && CONST_INT_P (op1
)
19410 && IN_RANGE (INTVAL (op1
), 0, 31))
19411 action
= action_for_partial_flag_setting
;
19415 /* RORS <Rdn>,<Rm> */
19416 if (rtx_equal_p (dst
, op0
)
19417 && low_register_operand (op1
, SImode
))
19418 action
= action_for_partial_flag_setting
;
19422 /* MVNS <Rd>,<Rm> */
19423 if (low_register_operand (op0
, SImode
))
19424 action
= action_for_partial_flag_setting
;
19428 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19429 if (low_register_operand (op0
, SImode
))
19434 /* MOVS <Rd>,#<imm8> */
19435 if (CONST_INT_P (src
)
19436 && IN_RANGE (INTVAL (src
), 0, 255))
19437 action
= action_for_partial_flag_setting
;
19441 /* MOVS and MOV<c> with registers have different
19442 encodings, so are not relevant here. */
19450 if (action
!= SKIP
)
19452 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
19453 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
19456 if (action
== SWAP_CONV
)
19458 src
= copy_rtx (src
);
19459 XEXP (src
, 0) = op1
;
19460 XEXP (src
, 1) = op0
;
19461 pat
= gen_rtx_SET (dst
, src
);
19462 vec
= gen_rtvec (2, pat
, clobber
);
19464 else /* action == CONV */
19465 vec
= gen_rtvec (2, pat
, clobber
);
19467 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
19468 INSN_CODE (insn
) = -1;
19472 if (NONDEBUG_INSN_P (insn
))
19473 df_simulate_one_insn_backwards (bb
, insn
, &live
);
19477 CLEAR_REG_SET (&live
);
19480 /* Gcc puts the pool in the wrong place for ARM, since we can only
19481 load addresses a limited distance around the pc. We do some
19482 special munging to move the constant pool values to the correct
19483 point in the code. */
19488 HOST_WIDE_INT address
= 0;
19492 cmse_nonsecure_call_inline_register_clear ();
19494 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19495 if (cfun
->is_thunk
)
19497 else if (TARGET_THUMB1
)
19499 else if (TARGET_THUMB2
)
19502 /* Ensure all insns that must be split have been split at this point.
19503 Otherwise, the pool placement code below may compute incorrect
19504 insn lengths. Note that when optimizing, all insns have already
19505 been split at this point. */
19507 split_all_insns_noflow ();
19509 /* Make sure we do not attempt to create a literal pool even though it should
19510 no longer be necessary to create any. */
19511 if (arm_disable_literal_pool
)
19514 minipool_fix_head
= minipool_fix_tail
= NULL
;
19516 /* The first insn must always be a note, or the code below won't
19517 scan it properly. */
19518 insn
= get_insns ();
19519 gcc_assert (NOTE_P (insn
));
19522 /* Scan all the insns and record the operands that will need fixing. */
19523 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
19525 if (BARRIER_P (insn
))
19526 push_minipool_barrier (insn
, address
);
19527 else if (INSN_P (insn
))
19529 rtx_jump_table_data
*table
;
19531 note_invalid_constants (insn
, address
, true);
19532 address
+= get_attr_length (insn
);
19534 /* If the insn is a vector jump, add the size of the table
19535 and skip the table. */
19536 if (tablejump_p (insn
, NULL
, &table
))
19538 address
+= get_jump_table_size (table
);
19542 else if (LABEL_P (insn
))
19543 /* Add the worst-case padding due to alignment. We don't add
19544 the _current_ padding because the minipool insertions
19545 themselves might change it. */
19546 address
+= get_label_padding (insn
);
19549 fix
= minipool_fix_head
;
19551 /* Now scan the fixups and perform the required changes. */
19556 Mfix
* last_added_fix
;
19557 Mfix
* last_barrier
= NULL
;
19560 /* Skip any further barriers before the next fix. */
19561 while (fix
&& BARRIER_P (fix
->insn
))
19564 /* No more fixes. */
19568 last_added_fix
= NULL
;
19570 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
19572 if (BARRIER_P (ftmp
->insn
))
19574 if (ftmp
->address
>= minipool_vector_head
->max_address
)
19577 last_barrier
= ftmp
;
19579 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
19582 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
19585 /* If we found a barrier, drop back to that; any fixes that we
19586 could have reached but come after the barrier will now go in
19587 the next mini-pool. */
19588 if (last_barrier
!= NULL
)
19590 /* Reduce the refcount for those fixes that won't go into this
19592 for (fdel
= last_barrier
->next
;
19593 fdel
&& fdel
!= ftmp
;
19596 fdel
->minipool
->refcount
--;
19597 fdel
->minipool
= NULL
;
19600 ftmp
= last_barrier
;
19604 /* ftmp is first fix that we can't fit into this pool and
19605 there no natural barriers that we could use. Insert a
19606 new barrier in the code somewhere between the previous
19607 fix and this one, and arrange to jump around it. */
19608 HOST_WIDE_INT max_address
;
19610 /* The last item on the list of fixes must be a barrier, so
19611 we can never run off the end of the list of fixes without
19612 last_barrier being set. */
19615 max_address
= minipool_vector_head
->max_address
;
19616 /* Check that there isn't another fix that is in range that
19617 we couldn't fit into this pool because the pool was
19618 already too large: we need to put the pool before such an
19619 instruction. The pool itself may come just after the
19620 fix because create_fix_barrier also allows space for a
19621 jump instruction. */
19622 if (ftmp
->address
< max_address
)
19623 max_address
= ftmp
->address
+ 1;
19625 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
19628 assign_minipool_offsets (last_barrier
);
19632 if (!BARRIER_P (ftmp
->insn
)
19633 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
19640 /* Scan over the fixes we have identified for this pool, fixing them
19641 up and adding the constants to the pool itself. */
19642 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
19643 this_fix
= this_fix
->next
)
19644 if (!BARRIER_P (this_fix
->insn
))
19647 = plus_constant (Pmode
,
19648 gen_rtx_LABEL_REF (VOIDmode
,
19649 minipool_vector_label
),
19650 this_fix
->minipool
->offset
);
19651 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
19654 dump_minipool (last_barrier
->insn
);
19658 /* From now on we must synthesize any constants that we can't handle
19659 directly. This can happen if the RTL gets split during final
19660 instruction generation. */
19661 cfun
->machine
->after_arm_reorg
= 1;
19663 /* Free the minipool memory. */
19664 obstack_free (&minipool_obstack
, minipool_startobj
);
19667 /* Routines to output assembly language. */
19669 /* Return string representation of passed in real value. */
19670 static const char *
19671 fp_const_from_val (REAL_VALUE_TYPE
*r
)
19673 if (!fp_consts_inited
)
19676 gcc_assert (real_equal (r
, &value_fp0
));
19680 /* OPERANDS[0] is the entire list of insns that constitute pop,
19681 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19682 is in the list, UPDATE is true iff the list contains explicit
19683 update of base register. */
19685 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
19691 const char *conditional
;
19692 int num_saves
= XVECLEN (operands
[0], 0);
19693 unsigned int regno
;
19694 unsigned int regno_base
= REGNO (operands
[1]);
19695 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
19698 offset
+= update
? 1 : 0;
19699 offset
+= return_pc
? 1 : 0;
19701 /* Is the base register in the list? */
19702 for (i
= offset
; i
< num_saves
; i
++)
19704 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
19705 /* If SP is in the list, then the base register must be SP. */
19706 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
19707 /* If base register is in the list, there must be no explicit update. */
19708 if (regno
== regno_base
)
19709 gcc_assert (!update
);
19712 conditional
= reverse
? "%?%D0" : "%?%d0";
19713 /* Can't use POP if returning from an interrupt. */
19714 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
19715 sprintf (pattern
, "pop%s\t{", conditional
);
19718 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19719 It's just a convention, their semantics are identical. */
19720 if (regno_base
== SP_REGNUM
)
19721 sprintf (pattern
, "ldmfd%s\t", conditional
);
19723 sprintf (pattern
, "ldmia%s\t", conditional
);
19725 sprintf (pattern
, "ldm%s\t", conditional
);
19727 strcat (pattern
, reg_names
[regno_base
]);
19729 strcat (pattern
, "!, {");
19731 strcat (pattern
, ", {");
19734 /* Output the first destination register. */
19736 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
19738 /* Output the rest of the destination registers. */
19739 for (i
= offset
+ 1; i
< num_saves
; i
++)
19741 strcat (pattern
, ", ");
19743 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
19746 strcat (pattern
, "}");
19748 if (interrupt_p
&& return_pc
)
19749 strcat (pattern
, "^");
19751 output_asm_insn (pattern
, &cond
);
19755 /* Output the assembly for a store multiple. */
19758 vfp_output_vstmd (rtx
* operands
)
19764 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
19765 ? XEXP (operands
[0], 0)
19766 : XEXP (XEXP (operands
[0], 0), 0);
19767 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
19770 strcpy (pattern
, "vpush%?.64\t{%P1");
19772 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
19774 p
= strlen (pattern
);
19776 gcc_assert (REG_P (operands
[1]));
19778 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
19779 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
19781 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
19783 strcpy (&pattern
[p
], "}");
19785 output_asm_insn (pattern
, operands
);
19790 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19791 number of bytes pushed. */
19794 vfp_emit_fstmd (int base_reg
, int count
)
19801 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19802 register pairs are stored by a store multiple insn. We avoid this
19803 by pushing an extra pair. */
19804 if (count
== 2 && !arm_arch6
)
19806 if (base_reg
== LAST_VFP_REGNUM
- 3)
19811 /* FSTMD may not store more than 16 doubleword registers at once. Split
19812 larger stores into multiple parts (up to a maximum of two, in
19817 /* NOTE: base_reg is an internal register number, so each D register
19819 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
19820 saved
+= vfp_emit_fstmd (base_reg
, 16);
19824 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
19825 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
19827 reg
= gen_rtx_REG (DFmode
, base_reg
);
19830 XVECEXP (par
, 0, 0)
19831 = gen_rtx_SET (gen_frame_mem
19833 gen_rtx_PRE_MODIFY (Pmode
,
19836 (Pmode
, stack_pointer_rtx
,
19839 gen_rtx_UNSPEC (BLKmode
,
19840 gen_rtvec (1, reg
),
19841 UNSPEC_PUSH_MULT
));
19843 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19844 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
19845 RTX_FRAME_RELATED_P (tmp
) = 1;
19846 XVECEXP (dwarf
, 0, 0) = tmp
;
19848 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
19849 RTX_FRAME_RELATED_P (tmp
) = 1;
19850 XVECEXP (dwarf
, 0, 1) = tmp
;
19852 for (i
= 1; i
< count
; i
++)
19854 reg
= gen_rtx_REG (DFmode
, base_reg
);
19856 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
19858 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
19859 plus_constant (Pmode
,
19863 RTX_FRAME_RELATED_P (tmp
) = 1;
19864 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
19867 par
= emit_insn (par
);
19868 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
19869 RTX_FRAME_RELATED_P (par
) = 1;
19874 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19875 has the cmse_nonsecure_call attribute and returns false otherwise. */
19878 detect_cmse_nonsecure_call (tree addr
)
19883 tree fntype
= TREE_TYPE (addr
);
19884 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
19885 TYPE_ATTRIBUTES (fntype
)))
19891 /* Emit a call instruction with pattern PAT. ADDR is the address of
19892 the call target. */
19895 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
19899 insn
= emit_call_insn (pat
);
19901 /* The PIC register is live on entry to VxWorks PIC PLT entries.
19902 If the call might use such an entry, add a use of the PIC register
19903 to the instruction's CALL_INSN_FUNCTION_USAGE. */
19904 if (TARGET_VXWORKS_RTP
19907 && SYMBOL_REF_P (addr
)
19908 && (SYMBOL_REF_DECL (addr
)
19909 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
19910 : !SYMBOL_REF_LOCAL_P (addr
)))
19912 require_pic_register (NULL_RTX
, false /*compute_now*/);
19913 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
19918 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
19919 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), fdpic_reg
);
19922 if (TARGET_AAPCS_BASED
)
19924 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19925 linker. We need to add an IP clobber to allow setting
19926 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
19927 is not needed since it's a fixed register. */
19928 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
19929 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
19933 /* Output a 'call' insn. */
19935 output_call (rtx
*operands
)
19937 gcc_assert (!arm_arch5t
); /* Patterns should call blx <reg> directly. */
19939 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
19940 if (REGNO (operands
[0]) == LR_REGNUM
)
19942 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
19943 output_asm_insn ("mov%?\t%0, %|lr", operands
);
19946 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
19948 if (TARGET_INTERWORK
|| arm_arch4t
)
19949 output_asm_insn ("bx%?\t%0", operands
);
19951 output_asm_insn ("mov%?\t%|pc, %0", operands
);
19956 /* Output a move from arm registers to arm registers of a long double
19957 OPERANDS[0] is the destination.
19958 OPERANDS[1] is the source. */
19960 output_mov_long_double_arm_from_arm (rtx
*operands
)
19962 /* We have to be careful here because the two might overlap. */
19963 int dest_start
= REGNO (operands
[0]);
19964 int src_start
= REGNO (operands
[1]);
19968 if (dest_start
< src_start
)
19970 for (i
= 0; i
< 3; i
++)
19972 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
19973 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
19974 output_asm_insn ("mov%?\t%0, %1", ops
);
19979 for (i
= 2; i
>= 0; i
--)
19981 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
19982 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
19983 output_asm_insn ("mov%?\t%0, %1", ops
);
19991 arm_emit_movpair (rtx dest
, rtx src
)
19993 /* If the src is an immediate, simplify it. */
19994 if (CONST_INT_P (src
))
19996 HOST_WIDE_INT val
= INTVAL (src
);
19997 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
19998 if ((val
>> 16) & 0x0000ffff)
20000 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
20002 GEN_INT ((val
>> 16) & 0x0000ffff));
20003 rtx_insn
*insn
= get_last_insn ();
20004 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
20008 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
20009 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
20010 rtx_insn
*insn
= get_last_insn ();
20011 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
20014 /* Output a move between double words. It must be REG<-MEM
20017 output_move_double (rtx
*operands
, bool emit
, int *count
)
20019 enum rtx_code code0
= GET_CODE (operands
[0]);
20020 enum rtx_code code1
= GET_CODE (operands
[1]);
20025 /* The only case when this might happen is when
20026 you are looking at the length of a DImode instruction
20027 that has an invalid constant in it. */
20028 if (code0
== REG
&& code1
!= MEM
)
20030 gcc_assert (!emit
);
20037 unsigned int reg0
= REGNO (operands
[0]);
20038 const bool can_ldrd
= TARGET_LDRD
&& (TARGET_THUMB2
|| (reg0
% 2 == 0));
20040 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
20042 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
20044 switch (GET_CODE (XEXP (operands
[1], 0)))
20051 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
20052 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
20054 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
20059 gcc_assert (can_ldrd
);
20061 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
20068 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
20070 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
20078 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
20080 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
20085 gcc_assert (can_ldrd
);
20087 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
20092 /* Autoicrement addressing modes should never have overlapping
20093 base and destination registers, and overlapping index registers
20094 are already prohibited, so this doesn't need to worry about
20096 otherops
[0] = operands
[0];
20097 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
20098 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
20100 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
20102 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
20104 /* Registers overlap so split out the increment. */
20107 gcc_assert (can_ldrd
);
20108 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
20109 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
20116 /* Use a single insn if we can.
20117 FIXME: IWMMXT allows offsets larger than ldrd can
20118 handle, fix these up with a pair of ldr. */
20121 || !CONST_INT_P (otherops
[2])
20122 || (INTVAL (otherops
[2]) > -256
20123 && INTVAL (otherops
[2]) < 256)))
20126 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
20132 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
20133 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
20143 /* Use a single insn if we can.
20144 FIXME: IWMMXT allows offsets larger than ldrd can handle,
20145 fix these up with a pair of ldr. */
20148 || !CONST_INT_P (otherops
[2])
20149 || (INTVAL (otherops
[2]) > -256
20150 && INTVAL (otherops
[2]) < 256)))
20153 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
20159 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
20160 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
20170 /* We might be able to use ldrd %0, %1 here. However the range is
20171 different to ldr/adr, and it is broken on some ARMv7-M
20172 implementations. */
20173 /* Use the second register of the pair to avoid problematic
20175 otherops
[1] = operands
[1];
20177 output_asm_insn ("adr%?\t%0, %1", otherops
);
20178 operands
[1] = otherops
[0];
20182 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
20184 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
20191 /* ??? This needs checking for thumb2. */
20193 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
20194 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
20196 otherops
[0] = operands
[0];
20197 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
20198 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
20200 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
20202 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
20204 switch ((int) INTVAL (otherops
[2]))
20208 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
20214 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
20220 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
20224 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
20225 operands
[1] = otherops
[0];
20227 && (REG_P (otherops
[2])
20229 || (CONST_INT_P (otherops
[2])
20230 && INTVAL (otherops
[2]) > -256
20231 && INTVAL (otherops
[2]) < 256)))
20233 if (reg_overlap_mentioned_p (operands
[0],
20236 /* Swap base and index registers over to
20237 avoid a conflict. */
20238 std::swap (otherops
[1], otherops
[2]);
20240 /* If both registers conflict, it will usually
20241 have been fixed by a splitter. */
20242 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
20243 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
20247 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20248 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
20255 otherops
[0] = operands
[0];
20257 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
20262 if (CONST_INT_P (otherops
[2]))
20266 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
20267 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
20269 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20275 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20281 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
20288 return "ldrd%?\t%0, [%1]";
20290 return "ldmia%?\t%1, %M0";
20294 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
20295 /* Take care of overlapping base/data reg. */
20296 if (reg_mentioned_p (operands
[0], operands
[1]))
20300 output_asm_insn ("ldr%?\t%0, %1", otherops
);
20301 output_asm_insn ("ldr%?\t%0, %1", operands
);
20311 output_asm_insn ("ldr%?\t%0, %1", operands
);
20312 output_asm_insn ("ldr%?\t%0, %1", otherops
);
20322 /* Constraints should ensure this. */
20323 gcc_assert (code0
== MEM
&& code1
== REG
);
20324 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
20325 || (TARGET_ARM
&& TARGET_LDRD
));
20327 /* For TARGET_ARM the first source register of an STRD
20328 must be even. This is usually the case for double-word
20329 values but user assembly constraints can force an odd
20330 starting register. */
20331 bool allow_strd
= TARGET_LDRD
20332 && !(TARGET_ARM
&& (REGNO (operands
[1]) & 1) == 1);
20333 switch (GET_CODE (XEXP (operands
[0], 0)))
20339 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
20341 output_asm_insn ("stm%?\t%m0, %M1", operands
);
20346 gcc_assert (allow_strd
);
20348 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
20355 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
20357 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
20365 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
20367 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
20372 gcc_assert (allow_strd
);
20374 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
20379 otherops
[0] = operands
[1];
20380 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
20381 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
20383 /* IWMMXT allows offsets larger than strd can handle,
20384 fix these up with a pair of str. */
20386 && CONST_INT_P (otherops
[2])
20387 && (INTVAL(otherops
[2]) <= -256
20388 || INTVAL(otherops
[2]) >= 256))
20390 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
20394 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
20395 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
20404 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
20405 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
20411 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
20414 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
20419 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
20424 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
20425 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
20427 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
20431 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
20438 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
20445 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
20450 && (REG_P (otherops
[2])
20452 || (CONST_INT_P (otherops
[2])
20453 && INTVAL (otherops
[2]) > -256
20454 && INTVAL (otherops
[2]) < 256)))
20456 otherops
[0] = operands
[1];
20457 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
20459 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
20465 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
20466 otherops
[1] = operands
[1];
20469 output_asm_insn ("str%?\t%1, %0", operands
);
20470 output_asm_insn ("str%?\t%H1, %0", otherops
);
20480 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20481 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20484 output_move_quad (rtx
*operands
)
20486 if (REG_P (operands
[0]))
20488 /* Load, or reg->reg move. */
20490 if (MEM_P (operands
[1]))
20492 switch (GET_CODE (XEXP (operands
[1], 0)))
20495 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
20500 output_asm_insn ("adr%?\t%0, %1", operands
);
20501 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
20505 gcc_unreachable ();
20513 gcc_assert (REG_P (operands
[1]));
20515 dest
= REGNO (operands
[0]);
20516 src
= REGNO (operands
[1]);
20518 /* This seems pretty dumb, but hopefully GCC won't try to do it
20521 for (i
= 0; i
< 4; i
++)
20523 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
20524 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
20525 output_asm_insn ("mov%?\t%0, %1", ops
);
20528 for (i
= 3; i
>= 0; i
--)
20530 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
20531 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
20532 output_asm_insn ("mov%?\t%0, %1", ops
);
20538 gcc_assert (MEM_P (operands
[0]));
20539 gcc_assert (REG_P (operands
[1]));
20540 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
20542 switch (GET_CODE (XEXP (operands
[0], 0)))
20545 output_asm_insn ("stm%?\t%m0, %M1", operands
);
20549 gcc_unreachable ();
20556 /* Output a VFP load or store instruction. */
20559 output_move_vfp (rtx
*operands
)
20561 rtx reg
, mem
, addr
, ops
[2];
20562 int load
= REG_P (operands
[0]);
20563 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
20564 int sp
= (!TARGET_VFP_FP16INST
20565 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
20566 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
20571 reg
= operands
[!load
];
20572 mem
= operands
[load
];
20574 mode
= GET_MODE (reg
);
20576 gcc_assert (REG_P (reg
));
20577 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
20578 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
20584 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
20585 gcc_assert (MEM_P (mem
));
20587 addr
= XEXP (mem
, 0);
20589 switch (GET_CODE (addr
))
20592 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20593 ops
[0] = XEXP (addr
, 0);
20598 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20599 ops
[0] = XEXP (addr
, 0);
20604 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
20610 sprintf (buff
, templ
,
20611 load
? "ld" : "st",
20612 dp
? "64" : sp
? "32" : "16",
20614 integer_p
? "\t%@ int" : "");
20615 output_asm_insn (buff
, ops
);
20620 /* Output a Neon double-word or quad-word load or store, or a load
20621 or store for larger structure modes.
20623 WARNING: The ordering of elements is weird in big-endian mode,
20624 because the EABI requires that vectors stored in memory appear
20625 as though they were stored by a VSTM, as required by the EABI.
20626 GCC RTL defines element ordering based on in-memory order.
20627 This can be different from the architectural ordering of elements
20628 within a NEON register. The intrinsics defined in arm_neon.h use the
20629 NEON register element ordering, not the GCC RTL element ordering.
20631 For example, the in-memory ordering of a big-endian a quadword
20632 vector with 16-bit elements when stored from register pair {d0,d1}
20633 will be (lowest address first, d0[N] is NEON register element N):
20635 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20637 When necessary, quadword registers (dN, dN+1) are moved to ARM
20638 registers from rN in the order:
20640 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20642 So that STM/LDM can be used on vectors in ARM registers, and the
20643 same memory layout will result as if VSTM/VLDM were used.
20645 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20646 possible, which allows use of appropriate alignment tags.
20647 Note that the choice of "64" is independent of the actual vector
20648 element size; this size simply ensures that the behavior is
20649 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20651 Due to limitations of those instructions, use of VST1.64/VLD1.64
20652 is not possible if:
20653 - the address contains PRE_DEC, or
20654 - the mode refers to more than 4 double-word registers
20656 In those cases, it would be possible to replace VSTM/VLDM by a
20657 sequence of instructions; this is not currently implemented since
20658 this is not certain to actually improve performance. */
20661 output_move_neon (rtx
*operands
)
20663 rtx reg
, mem
, addr
, ops
[2];
20664 int regno
, nregs
, load
= REG_P (operands
[0]);
20669 reg
= operands
[!load
];
20670 mem
= operands
[load
];
20672 mode
= GET_MODE (reg
);
20674 gcc_assert (REG_P (reg
));
20675 regno
= REGNO (reg
);
20676 nregs
= REG_NREGS (reg
) / 2;
20677 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
20678 || NEON_REGNO_OK_FOR_QUAD (regno
));
20679 gcc_assert (VALID_NEON_DREG_MODE (mode
)
20680 || VALID_NEON_QREG_MODE (mode
)
20681 || VALID_NEON_STRUCT_MODE (mode
));
20682 gcc_assert (MEM_P (mem
));
20684 addr
= XEXP (mem
, 0);
20686 /* Strip off const from addresses like (const (plus (...))). */
20687 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
20688 addr
= XEXP (addr
, 0);
20690 switch (GET_CODE (addr
))
20693 /* We have to use vldm / vstm for too-large modes. */
20694 if (nregs
> 4 || (TARGET_HAVE_MVE
&& nregs
>= 2))
20696 templ
= "v%smia%%?\t%%0!, %%h1";
20697 ops
[0] = XEXP (addr
, 0);
20701 templ
= "v%s1.64\t%%h1, %%A0";
20708 /* We have to use vldm / vstm in this case, since there is no
20709 pre-decrement form of the vld1 / vst1 instructions. */
20710 templ
= "v%smdb%%?\t%%0!, %%h1";
20711 ops
[0] = XEXP (addr
, 0);
20716 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20717 gcc_unreachable ();
20720 /* We have to use vldm / vstm for too-large modes. */
20723 if (nregs
> 4 || (TARGET_HAVE_MVE
&& nregs
>= 2))
20724 templ
= "v%smia%%?\t%%m0, %%h1";
20726 templ
= "v%s1.64\t%%h1, %%A0";
20732 /* Fall through. */
20734 if (GET_CODE (addr
) == PLUS
)
20735 addr
= XEXP (addr
, 0);
20736 /* Fall through. */
20741 for (i
= 0; i
< nregs
; i
++)
20743 /* We're only using DImode here because it's a convenient
20745 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
20746 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
20747 if (reg_overlap_mentioned_p (ops
[0], mem
))
20749 gcc_assert (overlap
== -1);
20754 if (TARGET_HAVE_MVE
&& LABEL_REF_P (addr
))
20755 sprintf (buff
, "v%sr.64\t%%P0, %%1", load
? "ld" : "st");
20757 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
20758 output_asm_insn (buff
, ops
);
20763 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
20764 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
20765 if (TARGET_HAVE_MVE
&& LABEL_REF_P (addr
))
20766 sprintf (buff
, "v%sr.32\t%%P0, %%1", load
? "ld" : "st");
20768 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
20769 output_asm_insn (buff
, ops
);
20776 gcc_unreachable ();
20779 sprintf (buff
, templ
, load
? "ld" : "st");
20780 output_asm_insn (buff
, ops
);
20785 /* Compute and return the length of neon_mov<mode>, where <mode> is
20786 one of VSTRUCT modes: EI, OI, CI or XI. */
20788 arm_attr_length_move_neon (rtx_insn
*insn
)
20790 rtx reg
, mem
, addr
;
20794 extract_insn_cached (insn
);
20796 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
20798 mode
= GET_MODE (recog_data
.operand
[0]);
20809 gcc_unreachable ();
20813 load
= REG_P (recog_data
.operand
[0]);
20814 reg
= recog_data
.operand
[!load
];
20815 mem
= recog_data
.operand
[load
];
20817 gcc_assert (MEM_P (mem
));
20819 addr
= XEXP (mem
, 0);
20821 /* Strip off const from addresses like (const (plus (...))). */
20822 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
20823 addr
= XEXP (addr
, 0);
20825 if (LABEL_REF_P (addr
) || GET_CODE (addr
) == PLUS
)
20827 int insns
= REG_NREGS (reg
) / 2;
20834 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20838 arm_address_offset_is_imm (rtx_insn
*insn
)
20842 extract_insn_cached (insn
);
20844 if (REG_P (recog_data
.operand
[0]))
20847 mem
= recog_data
.operand
[0];
20849 gcc_assert (MEM_P (mem
));
20851 addr
= XEXP (mem
, 0);
20854 || (GET_CODE (addr
) == PLUS
20855 && REG_P (XEXP (addr
, 0))
20856 && CONST_INT_P (XEXP (addr
, 1))))
20862 /* Output an ADD r, s, #n where n may be too big for one instruction.
20863 If adding zero to one register, output nothing. */
20865 output_add_immediate (rtx
*operands
)
20867 HOST_WIDE_INT n
= INTVAL (operands
[2]);
20869 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
20872 output_multi_immediate (operands
,
20873 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20876 output_multi_immediate (operands
,
20877 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20884 /* Output a multiple immediate operation.
20885 OPERANDS is the vector of operands referred to in the output patterns.
20886 INSTR1 is the output pattern to use for the first constant.
20887 INSTR2 is the output pattern to use for subsequent constants.
20888 IMMED_OP is the index of the constant slot in OPERANDS.
20889 N is the constant value. */
20890 static const char *
20891 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
20892 int immed_op
, HOST_WIDE_INT n
)
20894 #if HOST_BITS_PER_WIDE_INT > 32
20900 /* Quick and easy output. */
20901 operands
[immed_op
] = const0_rtx
;
20902 output_asm_insn (instr1
, operands
);
20907 const char * instr
= instr1
;
20909 /* Note that n is never zero here (which would give no output). */
20910 for (i
= 0; i
< 32; i
+= 2)
20914 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
20915 output_asm_insn (instr
, operands
);
20925 /* Return the name of a shifter operation. */
20926 static const char *
20927 arm_shift_nmem(enum rtx_code code
)
20932 return ARM_LSL_NAME
;
20948 /* Return the appropriate ARM instruction for the operation code.
20949 The returned result should not be overwritten. OP is the rtx of the
20950 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20953 arithmetic_instr (rtx op
, int shift_first_arg
)
20955 switch (GET_CODE (op
))
20961 return shift_first_arg
? "rsb" : "sub";
20976 return arm_shift_nmem(GET_CODE(op
));
20979 gcc_unreachable ();
20983 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20984 for the operation code. The returned result should not be overwritten.
20985 OP is the rtx code of the shift.
20986 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20988 static const char *
20989 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
20992 enum rtx_code code
= GET_CODE (op
);
20997 if (!CONST_INT_P (XEXP (op
, 1)))
20999 output_operand_lossage ("invalid shift operand");
21004 *amountp
= 32 - INTVAL (XEXP (op
, 1));
21012 mnem
= arm_shift_nmem(code
);
21013 if (CONST_INT_P (XEXP (op
, 1)))
21015 *amountp
= INTVAL (XEXP (op
, 1));
21017 else if (REG_P (XEXP (op
, 1)))
21024 output_operand_lossage ("invalid shift operand");
21030 /* We never have to worry about the amount being other than a
21031 power of 2, since this case can never be reloaded from a reg. */
21032 if (!CONST_INT_P (XEXP (op
, 1)))
21034 output_operand_lossage ("invalid shift operand");
21038 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
21040 /* Amount must be a power of two. */
21041 if (*amountp
& (*amountp
- 1))
21043 output_operand_lossage ("invalid shift operand");
21047 *amountp
= exact_log2 (*amountp
);
21048 gcc_assert (IN_RANGE (*amountp
, 0, 31));
21049 return ARM_LSL_NAME
;
21052 output_operand_lossage ("invalid shift operand");
21056 /* This is not 100% correct, but follows from the desire to merge
21057 multiplication by a power of 2 with the recognizer for a
21058 shift. >=32 is not a valid shift for "lsl", so we must try and
21059 output a shift that produces the correct arithmetical result.
21060 Using lsr #32 is identical except for the fact that the carry bit
21061 is not set correctly if we set the flags; but we never use the
21062 carry bit from such an operation, so we can ignore that. */
21063 if (code
== ROTATERT
)
21064 /* Rotate is just modulo 32. */
21066 else if (*amountp
!= (*amountp
& 31))
21068 if (code
== ASHIFT
)
21073 /* Shifts of 0 are no-ops. */
21080 /* Output a .ascii pseudo-op, keeping track of lengths. This is
21081 because /bin/as is horribly restrictive. The judgement about
21082 whether or not each character is 'printable' (and can be output as
21083 is) or not (and must be printed with an octal escape) must be made
21084 with reference to the *host* character set -- the situation is
21085 similar to that discussed in the comments above pp_c_char in
21086 c-pretty-print.cc. */
21088 #define MAX_ASCII_LEN 51
21091 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
21094 int len_so_far
= 0;
21096 fputs ("\t.ascii\t\"", stream
);
21098 for (i
= 0; i
< len
; i
++)
21102 if (len_so_far
>= MAX_ASCII_LEN
)
21104 fputs ("\"\n\t.ascii\t\"", stream
);
21110 if (c
== '\\' || c
== '\"')
21112 putc ('\\', stream
);
21120 fprintf (stream
, "\\%03o", c
);
21125 fputs ("\"\n", stream
);
21129 /* Compute the register save mask for registers 0 through 12
21130 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
21132 static unsigned long
21133 arm_compute_save_reg0_reg12_mask (void)
21135 unsigned long func_type
= arm_current_func_type ();
21136 unsigned long save_reg_mask
= 0;
21139 if (IS_INTERRUPT (func_type
))
21141 unsigned int max_reg
;
21142 /* Interrupt functions must not corrupt any registers,
21143 even call clobbered ones. If this is a leaf function
21144 we can just examine the registers used by the RTL, but
21145 otherwise we have to assume that whatever function is
21146 called might clobber anything, and so we have to save
21147 all the call-clobbered registers as well. */
21148 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
21149 /* FIQ handlers have registers r8 - r12 banked, so
21150 we only need to check r0 - r7, Normal ISRs only
21151 bank r14 and r15, so we must check up to r12.
21152 r13 is the stack pointer which is always preserved,
21153 so we do not need to consider it here. */
21158 for (reg
= 0; reg
<= max_reg
; reg
++)
21159 if (reg_needs_saving_p (reg
))
21160 save_reg_mask
|= (1 << reg
);
21162 /* Also save the pic base register if necessary. */
21163 if (PIC_REGISTER_MAY_NEED_SAVING
21164 && crtl
->uses_pic_offset_table
)
21165 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21167 else if (IS_VOLATILE(func_type
))
21169 /* For noreturn functions we historically omitted register saves
21170 altogether. However this really messes up debugging. As a
21171 compromise save just the frame pointers. Combined with the link
21172 register saved elsewhere this should be sufficient to get
21174 if (frame_pointer_needed
)
21175 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21176 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
21177 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
21178 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
21179 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
21183 /* In the normal case we only need to save those registers
21184 which are call saved and which are used by this function. */
21185 for (reg
= 0; reg
<= 11; reg
++)
21186 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
21187 save_reg_mask
|= (1 << reg
);
21189 /* Handle the frame pointer as a special case. */
21190 if (frame_pointer_needed
)
21191 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21193 /* If we aren't loading the PIC register,
21194 don't stack it even though it may be live. */
21195 if (PIC_REGISTER_MAY_NEED_SAVING
21196 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
21197 || crtl
->uses_pic_offset_table
))
21198 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21200 /* The prologue will copy SP into R0, so save it. */
21201 if (IS_STACKALIGN (func_type
))
21202 save_reg_mask
|= 1;
21205 /* Save registers so the exception handler can modify them. */
21206 if (crtl
->calls_eh_return
)
21212 reg
= EH_RETURN_DATA_REGNO (i
);
21213 if (reg
== INVALID_REGNUM
)
21215 save_reg_mask
|= 1 << reg
;
21219 return save_reg_mask
;
21222 /* Return true if r3 is live at the start of the function. */
21225 arm_r3_live_at_start_p (void)
21227 /* Just look at cfg info, which is still close enough to correct at this
21228 point. This gives false positives for broken functions that might use
21229 uninitialized data that happens to be allocated in r3, but who cares? */
21230 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
21233 /* Compute the number of bytes used to store the static chain register on the
21234 stack, above the stack frame. We need to know this accurately to get the
21235 alignment of the rest of the stack frame correct. */
21238 arm_compute_static_chain_stack_bytes (void)
21240 /* Once the value is updated from the init value of -1, do not
21242 if (cfun
->machine
->static_chain_stack_bytes
!= -1)
21243 return cfun
->machine
->static_chain_stack_bytes
;
21245 /* See the defining assertion in arm_expand_prologue. */
21246 if (IS_NESTED (arm_current_func_type ())
21247 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21248 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21249 || flag_stack_clash_protection
)
21250 && !df_regs_ever_live_p (LR_REGNUM
)))
21251 && arm_r3_live_at_start_p ()
21252 && crtl
->args
.pretend_args_size
== 0)
21258 /* Compute a bit mask of which core registers need to be
21259 saved on the stack for the current function.
21260 This is used by arm_compute_frame_layout, which may add extra registers. */
21262 static unsigned long
21263 arm_compute_save_core_reg_mask (void)
21265 unsigned int save_reg_mask
= 0;
21266 unsigned long func_type
= arm_current_func_type ();
21269 if (IS_NAKED (func_type
))
21270 /* This should never really happen. */
21273 /* If we are creating a stack frame, then we must save the frame pointer,
21274 IP (which will hold the old stack pointer), LR and the PC. */
21275 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21277 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
21280 | (1 << PC_REGNUM
);
21282 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
21284 if (arm_current_function_pac_enabled_p ())
21285 save_reg_mask
|= 1 << IP_REGNUM
;
21287 /* Decide if we need to save the link register.
21288 Interrupt routines have their own banked link register,
21289 so they never need to save it.
21290 Otherwise if we do not use the link register we do not need to save
21291 it. If we are pushing other registers onto the stack however, we
21292 can save an instruction in the epilogue by pushing the link register
21293 now and then popping it back into the PC. This incurs extra memory
21294 accesses though, so we only do it when optimizing for size, and only
21295 if we know that we will not need a fancy return sequence. */
21296 if (df_regs_ever_live_p (LR_REGNUM
)
21299 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
21300 && !crtl
->tail_call_emit
21301 && !crtl
->calls_eh_return
))
21302 save_reg_mask
|= 1 << LR_REGNUM
;
21304 if (cfun
->machine
->lr_save_eliminated
)
21305 save_reg_mask
&= ~ (1 << LR_REGNUM
);
21307 if (TARGET_REALLY_IWMMXT
21308 && ((bit_count (save_reg_mask
)
21309 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
21310 arm_compute_static_chain_stack_bytes())
21313 /* The total number of registers that are going to be pushed
21314 onto the stack is odd. We need to ensure that the stack
21315 is 64-bit aligned before we start to save iWMMXt registers,
21316 and also before we start to create locals. (A local variable
21317 might be a double or long long which we will load/store using
21318 an iWMMXt instruction). Therefore we need to push another
21319 ARM register, so that the stack will be 64-bit aligned. We
21320 try to avoid using the arg registers (r0 -r3) as they might be
21321 used to pass values in a tail call. */
21322 for (reg
= 4; reg
<= 12; reg
++)
21323 if ((save_reg_mask
& (1 << reg
)) == 0)
21327 save_reg_mask
|= (1 << reg
);
21330 cfun
->machine
->sibcall_blocked
= 1;
21331 save_reg_mask
|= (1 << 3);
21335 /* We may need to push an additional register for use initializing the
21336 PIC base register. */
21337 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
21338 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
21340 reg
= thumb_find_work_register (1 << 4);
21341 if (!call_used_or_fixed_reg_p (reg
))
21342 save_reg_mask
|= (1 << reg
);
21345 return save_reg_mask
;
21348 /* Compute a bit mask of which core registers need to be
21349 saved on the stack for the current function. */
21350 static unsigned long
21351 thumb1_compute_save_core_reg_mask (void)
21353 unsigned long mask
;
21357 for (reg
= 0; reg
< 12; reg
++)
21358 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
21361 /* Handle the frame pointer as a special case. */
21362 if (frame_pointer_needed
)
21363 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21366 && !TARGET_SINGLE_PIC_BASE
21367 && arm_pic_register
!= INVALID_REGNUM
21368 && crtl
->uses_pic_offset_table
)
21369 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21371 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21372 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
21373 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
21375 /* LR will also be pushed if any lo regs are pushed. */
21376 if (mask
& 0xff || thumb_force_lr_save ())
21377 mask
|= (1 << LR_REGNUM
);
21379 bool call_clobbered_scratch
21380 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21381 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21383 /* Make sure we have a low work register if we need one. We will
21384 need one if we are going to push a high register, but we are not
21385 currently intending to push a low register. However if both the
21386 prologue and epilogue have a spare call-clobbered low register,
21387 then we won't need to find an additional work register. It does
21388 not need to be the same register in the prologue and
21390 if ((mask
& 0xff) == 0
21391 && !call_clobbered_scratch
21392 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
21394 /* Use thumb_find_work_register to choose which register
21395 we will use. If the register is live then we will
21396 have to push it. Use LAST_LO_REGNUM as our fallback
21397 choice for the register to select. */
21398 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
21399 /* Make sure the register returned by thumb_find_work_register is
21400 not part of the return value. */
21401 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
21402 reg
= LAST_LO_REGNUM
;
21404 if (callee_saved_reg_p (reg
))
21408 /* The 504 below is 8 bytes less than 512 because there are two possible
21409 alignment words. We can't tell here if they will be present or not so we
21410 have to play it safe and assume that they are. */
21411 if ((CALLER_INTERWORKING_SLOT_SIZE
+
21412 ROUND_UP_WORD (get_frame_size ()) +
21413 crtl
->outgoing_args_size
) >= 504)
21415 /* This is the same as the code in thumb1_expand_prologue() which
21416 determines which register to use for stack decrement. */
21417 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
21418 if (mask
& (1 << reg
))
21421 if (reg
> LAST_LO_REGNUM
)
21423 /* Make sure we have a register available for stack decrement. */
21424 mask
|= 1 << LAST_LO_REGNUM
;
21431 /* Return the number of bytes required to save VFP registers. */
21433 arm_get_vfp_saved_size (void)
21435 unsigned int regno
;
21440 /* Space for saved VFP registers. */
21441 if (TARGET_VFP_BASE
)
21444 for (regno
= FIRST_VFP_REGNUM
;
21445 regno
< LAST_VFP_REGNUM
;
21448 if (!reg_needs_saving_p (regno
) && !reg_needs_saving_p (regno
+ 1))
21452 /* Workaround ARM10 VFPr1 bug. */
21453 if (count
== 2 && !arm_arch6
)
21455 saved
+= count
* 8;
21464 if (count
== 2 && !arm_arch6
)
21466 saved
+= count
* 8;
21473 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21474 everything bar the final return instruction. If simple_return is true,
21475 then do not output epilogue, because it has already been emitted in RTL.
21477 Note: do not forget to update length attribute of corresponding insn pattern
21478 when changing assembly output (eg. length attribute of
21479 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21480 register clearing sequences). */
21482 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
21483 bool simple_return
)
21485 char conditional
[10];
21488 unsigned long live_regs_mask
;
21489 unsigned long func_type
;
21490 arm_stack_offsets
*offsets
;
21492 func_type
= arm_current_func_type ();
21494 if (IS_NAKED (func_type
))
21497 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
21499 /* If this function was declared non-returning, and we have
21500 found a tail call, then we have to trust that the called
21501 function won't return. */
21506 /* Otherwise, trap an attempted return by aborting. */
21508 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
21510 assemble_external_libcall (ops
[1]);
21511 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
21517 gcc_assert (!cfun
->calls_alloca
|| really_return
);
21519 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
21521 cfun
->machine
->return_used_this_function
= 1;
21523 offsets
= arm_get_frame_offsets ();
21524 live_regs_mask
= offsets
->saved_regs_mask
;
21526 if (!simple_return
&& live_regs_mask
)
21528 const char * return_reg
;
21530 /* If we do not have any special requirements for function exit
21531 (e.g. interworking) then we can load the return address
21532 directly into the PC. Otherwise we must load it into LR. */
21534 && !IS_CMSE_ENTRY (func_type
)
21535 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
21536 return_reg
= reg_names
[PC_REGNUM
];
21538 return_reg
= reg_names
[LR_REGNUM
];
21540 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
21542 /* There are three possible reasons for the IP register
21543 being saved. 1) a stack frame was created, in which case
21544 IP contains the old stack pointer, or 2) an ISR routine
21545 corrupted it, or 3) it was saved to align the stack on
21546 iWMMXt. In case 1, restore IP into SP, otherwise just
21548 if (frame_pointer_needed
)
21550 live_regs_mask
&= ~ (1 << IP_REGNUM
);
21551 live_regs_mask
|= (1 << SP_REGNUM
);
21554 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
21557 /* On some ARM architectures it is faster to use LDR rather than
21558 LDM to load a single register. On other architectures, the
21559 cost is the same. In 26 bit mode, or for exception handlers,
21560 we have to use LDM to load the PC so that the CPSR is also
21562 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
21563 if (live_regs_mask
== (1U << reg
))
21566 if (reg
<= LAST_ARM_REGNUM
21567 && (reg
!= LR_REGNUM
21569 || ! IS_INTERRUPT (func_type
)))
21571 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
21572 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
21579 /* Generate the load multiple instruction to restore the
21580 registers. Note we can get here, even if
21581 frame_pointer_needed is true, but only if sp already
21582 points to the base of the saved core registers. */
21583 if (live_regs_mask
& (1 << SP_REGNUM
))
21585 unsigned HOST_WIDE_INT stack_adjust
;
21587 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
21588 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
21590 if (stack_adjust
&& arm_arch5t
&& TARGET_ARM
)
21591 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
21594 /* If we can't use ldmib (SA110 bug),
21595 then try to pop r3 instead. */
21597 live_regs_mask
|= 1 << 3;
21599 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
21602 /* For interrupt returns we have to use an LDM rather than
21603 a POP so that we can use the exception return variant. */
21604 else if (IS_INTERRUPT (func_type
))
21605 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
21607 sprintf (instr
, "pop%s\t{", conditional
);
21609 p
= instr
+ strlen (instr
);
21611 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
21612 if (live_regs_mask
& (1 << reg
))
21614 int l
= strlen (reg_names
[reg
]);
21620 memcpy (p
, ", ", 2);
21624 memcpy (p
, "%|", 2);
21625 memcpy (p
+ 2, reg_names
[reg
], l
);
21629 if (live_regs_mask
& (1 << LR_REGNUM
))
21631 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
21632 /* If returning from an interrupt, restore the CPSR. */
21633 if (IS_INTERRUPT (func_type
))
21640 output_asm_insn (instr
, & operand
);
21642 /* See if we need to generate an extra instruction to
21643 perform the actual function return. */
21645 && func_type
!= ARM_FT_INTERWORKED
21646 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
21648 /* The return has already been handled
21649 by loading the LR into the PC. */
21656 switch ((int) ARM_FUNC_TYPE (func_type
))
21660 /* ??? This is wrong for unified assembly syntax. */
21661 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
21664 case ARM_FT_INTERWORKED
:
21665 gcc_assert (arm_arch5t
|| arm_arch4t
);
21666 sprintf (instr
, "bx%s\t%%|lr", conditional
);
21669 case ARM_FT_EXCEPTION
:
21670 /* ??? This is wrong for unified assembly syntax. */
21671 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
21675 if (IS_CMSE_ENTRY (func_type
))
21677 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21678 emitted by cmse_nonsecure_entry_clear_before_return () and the
21679 VSTR/VLDR instructions in the prologue and epilogue. */
21680 if (!TARGET_HAVE_FPCXT_CMSE
)
21682 /* Check if we have to clear the 'GE bits' which is only used if
21683 parallel add and subtraction instructions are available. */
21684 if (TARGET_INT_SIMD
)
21685 snprintf (instr
, sizeof (instr
),
21686 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
21688 snprintf (instr
, sizeof (instr
),
21689 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
21691 output_asm_insn (instr
, & operand
);
21692 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21694 if (TARGET_HARD_FLOAT
)
21696 /* Clear the cumulative exception-status bits (0-4,7) and
21697 the condition code bits (28-31) of the FPSCR. We need
21698 to remember to clear the first scratch register used
21699 (IP) and save and restore the second (r4).
21701 Important note: the length of the
21702 thumb2_cmse_entry_return insn pattern must account for
21703 the size of the below instructions. */
21704 output_asm_insn ("push\t{%|r4}", & operand
);
21705 output_asm_insn ("vmrs\t%|ip, fpscr", & operand
);
21706 output_asm_insn ("movw\t%|r4, #65376", & operand
);
21707 output_asm_insn ("movt\t%|r4, #4095", & operand
);
21708 output_asm_insn ("and\t%|ip, %|r4", & operand
);
21709 output_asm_insn ("vmsr\tfpscr, %|ip", & operand
);
21710 output_asm_insn ("pop\t{%|r4}", & operand
);
21711 output_asm_insn ("mov\t%|ip, %|lr", & operand
);
21714 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
21716 /* Use bx if it's available. */
21717 else if (arm_arch5t
|| arm_arch4t
)
21718 sprintf (instr
, "bx%s\t%%|lr", conditional
);
21720 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
21724 output_asm_insn (instr
, & operand
);
21730 /* Output in FILE asm statements needed to declare the NAME of the function
21731 defined by its DECL node. */
21734 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
21736 size_t cmse_name_len
;
21737 char *cmse_name
= 0;
21738 char cmse_prefix
[] = "__acle_se_";
21740 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21741 extra function label for each function with the 'cmse_nonsecure_entry'
21742 attribute. This extra function label should be prepended with
21743 '__acle_se_', telling the linker that it needs to create secure gateway
21744 veneers for this function. */
21745 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
21746 DECL_ATTRIBUTES (decl
)))
21748 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
21749 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
21750 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
21751 targetm
.asm_out
.globalize_label (file
, cmse_name
);
21753 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
21754 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
21757 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
21758 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21759 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21760 ASM_OUTPUT_LABEL (file
, name
);
21763 ASM_OUTPUT_LABEL (file
, cmse_name
);
21765 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
21768 /* Write the function name into the code section, directly preceding
21769 the function prologue.
21771 Code will be output similar to this:
21773 .ascii "arm_poke_function_name", 0
21776 .word 0xff000000 + (t1 - t0)
21777 arm_poke_function_name
21779 stmfd sp!, {fp, ip, lr, pc}
21782 When performing a stack backtrace, code can inspect the value
21783 of 'pc' stored at 'fp' + 0. If the trace function then looks
21784 at location pc - 12 and the top 8 bits are set, then we know
21785 that there is a function name embedded immediately preceding this
21786 location and has length ((pc[-3]) & 0xff000000).
21788 We assume that pc is declared as a pointer to an unsigned long.
21790 It is of no benefit to output the function name if we are assembling
21791 a leaf function. These function types will not contain a stack
21792 backtrace structure, therefore it is not possible to determine the
21795 arm_poke_function_name (FILE *stream
, const char *name
)
21797 unsigned long alignlength
;
21798 unsigned long length
;
21801 length
= strlen (name
) + 1;
21802 alignlength
= ROUND_UP_WORD (length
);
21804 ASM_OUTPUT_ASCII (stream
, name
, length
);
21805 ASM_OUTPUT_ALIGN (stream
, 2);
21806 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
21807 assemble_aligned_integer (UNITS_PER_WORD
, x
);
21810 /* Place some comments into the assembler stream
21811 describing the current function. */
21813 arm_output_function_prologue (FILE *f
)
21815 unsigned long func_type
;
21817 /* Sanity check. */
21818 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
21820 func_type
= arm_current_func_type ();
21822 switch ((int) ARM_FUNC_TYPE (func_type
))
21825 case ARM_FT_NORMAL
:
21827 case ARM_FT_INTERWORKED
:
21828 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
21831 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
21834 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
21836 case ARM_FT_EXCEPTION
:
21837 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
21841 if (IS_NAKED (func_type
))
21842 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21844 if (IS_VOLATILE (func_type
))
21845 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
21847 if (IS_NESTED (func_type
))
21848 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
21849 if (IS_STACKALIGN (func_type
))
21850 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21851 if (IS_CMSE_ENTRY (func_type
))
21852 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
21854 asm_fprintf (f
, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21855 (HOST_WIDE_INT
) crtl
->args
.size
,
21856 crtl
->args
.pretend_args_size
,
21857 (HOST_WIDE_INT
) get_frame_size ());
21859 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21860 frame_pointer_needed
,
21861 cfun
->machine
->uses_anonymous_args
);
21863 if (cfun
->machine
->lr_save_eliminated
)
21864 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
21866 if (crtl
->calls_eh_return
)
21867 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
21872 arm_output_function_epilogue (FILE *)
21874 arm_stack_offsets
*offsets
;
21880 /* Emit any call-via-reg trampolines that are needed for v4t support
21881 of call_reg and call_value_reg type insns. */
21882 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
21884 rtx label
= cfun
->machine
->call_via
[regno
];
21888 switch_to_section (function_section (current_function_decl
));
21889 targetm
.asm_out
.internal_label (asm_out_file
, "L",
21890 CODE_LABEL_NUMBER (label
));
21891 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
21895 /* ??? Probably not safe to set this here, since it assumes that a
21896 function will be emitted as assembly immediately after we generate
21897 RTL for it. This does not happen for inline functions. */
21898 cfun
->machine
->return_used_this_function
= 0;
21900 else /* TARGET_32BIT */
21902 /* We need to take into account any stack-frame rounding. */
21903 offsets
= arm_get_frame_offsets ();
21905 gcc_assert (!use_return_insn (FALSE
, NULL
)
21906 || (cfun
->machine
->return_used_this_function
!= 0)
21907 || offsets
->saved_regs
== offsets
->outgoing_args
21908 || frame_pointer_needed
);
21912 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21913 STR and STRD. If an even number of registers are being pushed, one
21914 or more STRD patterns are created for each register pair. If an
21915 odd number of registers are pushed, emit an initial STR followed by
21916 as many STRD instructions as are needed. This works best when the
21917 stack is initially 64-bit aligned (the normal case), since it
21918 ensures that each STRD is also 64-bit aligned. */
21920 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
21925 rtx par
= NULL_RTX
;
21926 rtx dwarf
= NULL_RTX
;
21930 num_regs
= bit_count (saved_regs_mask
);
21932 /* Must be at least one register to save, and can't save SP or PC. */
21933 gcc_assert (num_regs
> 0 && num_regs
<= 14);
21934 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
21935 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
21937 /* Create sequence for DWARF info. All the frame-related data for
21938 debugging is held in this wrapper. */
21939 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
21941 /* Describe the stack adjustment. */
21942 tmp
= gen_rtx_SET (stack_pointer_rtx
,
21943 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
21944 RTX_FRAME_RELATED_P (tmp
) = 1;
21945 XVECEXP (dwarf
, 0, 0) = tmp
;
21947 /* Find the first register. */
21948 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
21953 /* If there's an odd number of registers to push. Start off by
21954 pushing a single register. This ensures that subsequent strd
21955 operations are dword aligned (assuming that SP was originally
21956 64-bit aligned). */
21957 if ((num_regs
& 1) != 0)
21959 rtx reg
, mem
, insn
;
21961 reg
= gen_rtx_REG (SImode
, regno
);
21963 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
21964 stack_pointer_rtx
));
21966 mem
= gen_frame_mem (Pmode
,
21968 (Pmode
, stack_pointer_rtx
,
21969 plus_constant (Pmode
, stack_pointer_rtx
,
21972 tmp
= gen_rtx_SET (mem
, reg
);
21973 RTX_FRAME_RELATED_P (tmp
) = 1;
21974 insn
= emit_insn (tmp
);
21975 RTX_FRAME_RELATED_P (insn
) = 1;
21976 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21977 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
21978 RTX_FRAME_RELATED_P (tmp
) = 1;
21981 XVECEXP (dwarf
, 0, i
) = tmp
;
21985 while (i
< num_regs
)
21986 if (saved_regs_mask
& (1 << regno
))
21988 rtx reg1
, reg2
, mem1
, mem2
;
21989 rtx tmp0
, tmp1
, tmp2
;
21992 /* Find the register to pair with this one. */
21993 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
21997 reg1
= gen_rtx_REG (SImode
, regno
);
21998 reg2
= gen_rtx_REG (SImode
, regno2
);
22005 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22008 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22010 -4 * (num_regs
- 1)));
22011 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
22012 plus_constant (Pmode
, stack_pointer_rtx
,
22014 tmp1
= gen_rtx_SET (mem1
, reg1
);
22015 tmp2
= gen_rtx_SET (mem2
, reg2
);
22016 RTX_FRAME_RELATED_P (tmp0
) = 1;
22017 RTX_FRAME_RELATED_P (tmp1
) = 1;
22018 RTX_FRAME_RELATED_P (tmp2
) = 1;
22019 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
22020 XVECEXP (par
, 0, 0) = tmp0
;
22021 XVECEXP (par
, 0, 1) = tmp1
;
22022 XVECEXP (par
, 0, 2) = tmp2
;
22023 insn
= emit_insn (par
);
22024 RTX_FRAME_RELATED_P (insn
) = 1;
22025 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22029 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22032 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22035 tmp1
= gen_rtx_SET (mem1
, reg1
);
22036 tmp2
= gen_rtx_SET (mem2
, reg2
);
22037 RTX_FRAME_RELATED_P (tmp1
) = 1;
22038 RTX_FRAME_RELATED_P (tmp2
) = 1;
22039 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22040 XVECEXP (par
, 0, 0) = tmp1
;
22041 XVECEXP (par
, 0, 1) = tmp2
;
22045 /* Create unwind information. This is an approximation. */
22046 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
22047 plus_constant (Pmode
,
22051 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
22052 plus_constant (Pmode
,
22057 RTX_FRAME_RELATED_P (tmp1
) = 1;
22058 RTX_FRAME_RELATED_P (tmp2
) = 1;
22059 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
22060 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
22062 regno
= regno2
+ 1;
22070 /* STRD in ARM mode requires consecutive registers. This function emits STRD
22071 whenever possible, otherwise it emits single-word stores. The first store
22072 also allocates stack space for all saved registers, using writeback with
22073 post-addressing mode. All other stores use offset addressing. If no STRD
22074 can be emitted, this function emits a sequence of single-word stores,
22075 and not an STM as before, because single-word stores provide more freedom
22076 scheduling and can be turned into an STM by peephole optimizations. */
22078 arm_emit_strd_push (unsigned long saved_regs_mask
)
22081 int i
, j
, dwarf_index
= 0;
22083 rtx dwarf
= NULL_RTX
;
22084 rtx insn
= NULL_RTX
;
22087 /* TODO: A more efficient code can be emitted by changing the
22088 layout, e.g., first push all pairs that can use STRD to keep the
22089 stack aligned, and then push all other registers. */
22090 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22091 if (saved_regs_mask
& (1 << i
))
22094 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
22095 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
22096 gcc_assert (num_regs
> 0);
22098 /* Create sequence for DWARF info. */
22099 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
22101 /* For dwarf info, we generate explicit stack update. */
22102 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22103 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
22104 RTX_FRAME_RELATED_P (tmp
) = 1;
22105 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22107 /* Save registers. */
22108 offset
= - 4 * num_regs
;
22110 while (j
<= LAST_ARM_REGNUM
)
22111 if (saved_regs_mask
& (1 << j
))
22114 && (saved_regs_mask
& (1 << (j
+ 1))))
22116 /* Current register and previous register form register pair for
22117 which STRD can be generated. */
22120 /* Allocate stack space for all saved registers. */
22121 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22122 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
22123 mem
= gen_frame_mem (DImode
, tmp
);
22126 else if (offset
> 0)
22127 mem
= gen_frame_mem (DImode
,
22128 plus_constant (Pmode
,
22132 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
22134 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
22135 RTX_FRAME_RELATED_P (tmp
) = 1;
22136 tmp
= emit_insn (tmp
);
22138 /* Record the first store insn. */
22139 if (dwarf_index
== 1)
22142 /* Generate dwarf info. */
22143 mem
= gen_frame_mem (SImode
,
22144 plus_constant (Pmode
,
22147 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22148 RTX_FRAME_RELATED_P (tmp
) = 1;
22149 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22151 mem
= gen_frame_mem (SImode
,
22152 plus_constant (Pmode
,
22155 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
22156 RTX_FRAME_RELATED_P (tmp
) = 1;
22157 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22164 /* Emit a single word store. */
22167 /* Allocate stack space for all saved registers. */
22168 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22169 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
22170 mem
= gen_frame_mem (SImode
, tmp
);
22173 else if (offset
> 0)
22174 mem
= gen_frame_mem (SImode
,
22175 plus_constant (Pmode
,
22179 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
22181 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22182 RTX_FRAME_RELATED_P (tmp
) = 1;
22183 tmp
= emit_insn (tmp
);
22185 /* Record the first store insn. */
22186 if (dwarf_index
== 1)
22189 /* Generate dwarf info. */
22190 mem
= gen_frame_mem (SImode
,
22191 plus_constant(Pmode
,
22194 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22195 RTX_FRAME_RELATED_P (tmp
) = 1;
22196 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22205 /* Attach dwarf info to the first insn we generate. */
22206 gcc_assert (insn
!= NULL_RTX
);
22207 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22208 RTX_FRAME_RELATED_P (insn
) = 1;
22211 /* Generate and emit an insn that we will recognize as a push_multi.
22212 Unfortunately, since this insn does not reflect very well the actual
22213 semantics of the operation, we need to annotate the insn for the benefit
22214 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
22215 MASK for registers that should be annotated for DWARF2 frame unwind
22218 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
22221 int num_dwarf_regs
= 0;
22225 int dwarf_par_index
;
22228 /* We don't record the PC in the dwarf frame information. */
22229 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
22231 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22233 if (mask
& (1 << i
))
22235 if (dwarf_regs_mask
& (1 << i
))
22239 gcc_assert (num_regs
&& num_regs
<= 16);
22240 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
22242 /* For the body of the insn we are going to generate an UNSPEC in
22243 parallel with several USEs. This allows the insn to be recognized
22244 by the push_multi pattern in the arm.md file.
22246 The body of the insn looks something like this:
22249 (set (mem:BLK (pre_modify:SI (reg:SI sp)
22250 (const_int:SI <num>)))
22251 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22257 For the frame note however, we try to be more explicit and actually
22258 show each register being stored into the stack frame, plus a (single)
22259 decrement of the stack pointer. We do it this way in order to be
22260 friendly to the stack unwinding code, which only wants to see a single
22261 stack decrement per instruction. The RTL we generate for the note looks
22262 something like this:
22265 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22266 (set (mem:SI (reg:SI sp)) (reg:SI r4))
22267 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22268 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22272 FIXME:: In an ideal world the PRE_MODIFY would not exist and
22273 instead we'd have a parallel expression detailing all
22274 the stores to the various memory addresses so that debug
22275 information is more up-to-date. Remember however while writing
22276 this to take care of the constraints with the push instruction.
22278 Note also that this has to be taken care of for the VFP registers.
22280 For more see PR43399. */
22282 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
22283 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
22284 dwarf_par_index
= 1;
22286 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22288 if (mask
& (1 << i
))
22290 /* NOTE: Dwarf code emitter handle reg-reg copies correctly and in the
22291 following example reg-reg copy of SP to IP register is handled
22292 through .cfi_def_cfa_register directive and the .cfi_offset
22293 directive for IP register is skipped by dwarf code emitter.
22296 .cfi_def_cfa_register 12
22297 push {fp, ip, lr, pc}
22298 .cfi_offset 11, -16
22299 .cfi_offset 13, -12
22302 Where as Arm-specific .save directive handling is different to that
22303 of dwarf code emitter and it doesn't consider reg-reg copies while
22304 updating the register list. When PACBTI is enabled we manually
22305 updated the .save directive register list to use "ra_auth_code"
22306 (pseduo register 143) instead of IP register as shown in following
22310 .cfi_register 143, 12
22311 push {r3, r7, ip, lr}
22312 .save {r3, r7, ra_auth_code, lr}
22314 rtx dwarf_reg
= reg
= gen_rtx_REG (SImode
, i
);
22315 if (arm_current_function_pac_enabled_p () && i
== IP_REGNUM
)
22316 dwarf_reg
= gen_rtx_REG (SImode
, RA_AUTH_CODE
);
22318 XVECEXP (par
, 0, 0)
22319 = gen_rtx_SET (gen_frame_mem
22321 gen_rtx_PRE_MODIFY (Pmode
,
22324 (Pmode
, stack_pointer_rtx
,
22327 gen_rtx_UNSPEC (BLKmode
,
22328 gen_rtvec (1, reg
),
22329 UNSPEC_PUSH_MULT
));
22331 if (dwarf_regs_mask
& (1 << i
))
22333 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
22335 RTX_FRAME_RELATED_P (tmp
) = 1;
22336 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
22343 for (j
= 1, i
++; j
< num_regs
; i
++)
22345 if (mask
& (1 << i
))
22347 rtx dwarf_reg
= reg
= gen_rtx_REG (SImode
, i
);
22348 if (arm_current_function_pac_enabled_p () && i
== IP_REGNUM
)
22349 dwarf_reg
= gen_rtx_REG (SImode
, RA_AUTH_CODE
);
22351 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
22353 if (dwarf_regs_mask
& (1 << i
))
22356 = gen_rtx_SET (gen_frame_mem
22358 plus_constant (Pmode
, stack_pointer_rtx
,
22361 RTX_FRAME_RELATED_P (tmp
) = 1;
22362 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
22369 par
= emit_insn (par
);
22371 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22372 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
22373 RTX_FRAME_RELATED_P (tmp
) = 1;
22374 XVECEXP (dwarf
, 0, 0) = tmp
;
22376 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
22381 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22382 SIZE is the offset to be adjusted.
22383 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22385 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
22389 RTX_FRAME_RELATED_P (insn
) = 1;
22390 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
22391 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
22394 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22395 SAVED_REGS_MASK shows which registers need to be restored.
22397 Unfortunately, since this insn does not reflect very well the actual
22398 semantics of the operation, we need to annotate the insn for the benefit
22399 of DWARF2 frame unwind information. */
22401 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
22406 rtx dwarf
= NULL_RTX
;
22408 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
22412 offset_adj
= return_in_pc
? 1 : 0;
22413 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22414 if (saved_regs_mask
& (1 << i
))
22417 gcc_assert (num_regs
&& num_regs
<= 16);
22419 /* If SP is in reglist, then we don't emit SP update insn. */
22420 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
22422 /* The parallel needs to hold num_regs SETs
22423 and one SET for the stack update. */
22424 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
22427 XVECEXP (par
, 0, 0) = ret_rtx
;
22431 /* Increment the stack pointer, based on there being
22432 num_regs 4-byte registers to restore. */
22433 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22434 plus_constant (Pmode
,
22437 RTX_FRAME_RELATED_P (tmp
) = 1;
22438 XVECEXP (par
, 0, offset_adj
) = tmp
;
22441 /* Now restore every reg, which may include PC. */
22442 for (j
= 0, i
= 0; j
< num_regs
; i
++)
22443 if (saved_regs_mask
& (1 << i
))
22445 rtx dwarf_reg
= reg
= gen_rtx_REG (SImode
, i
);
22446 if (arm_current_function_pac_enabled_p () && i
== IP_REGNUM
)
22447 dwarf_reg
= gen_rtx_REG (SImode
, RA_AUTH_CODE
);
22448 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
22450 /* Emit single load with writeback. */
22451 tmp
= gen_frame_mem (SImode
,
22452 gen_rtx_POST_INC (Pmode
,
22453 stack_pointer_rtx
));
22454 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
22455 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, dwarf_reg
,
22460 tmp
= gen_rtx_SET (reg
,
22463 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
22464 RTX_FRAME_RELATED_P (tmp
) = 1;
22465 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
22467 /* We need to maintain a sequence for DWARF info too. As dwarf info
22468 should not have PC, skip PC. */
22469 if (i
!= PC_REGNUM
)
22470 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, dwarf_reg
, dwarf
);
22476 par
= emit_jump_insn (par
);
22478 par
= emit_insn (par
);
22480 REG_NOTES (par
) = dwarf
;
22482 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
22483 stack_pointer_rtx
, stack_pointer_rtx
);
22486 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22487 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22489 Unfortunately, since this insn does not reflect very well the actual
22490 semantics of the operation, we need to annotate the insn for the benefit
22491 of DWARF2 frame unwind information. */
22493 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
22497 rtx dwarf
= NULL_RTX
;
22500 gcc_assert (num_regs
&& num_regs
<= 32);
22502 /* Workaround ARM10 VFPr1 bug. */
22503 if (num_regs
== 2 && !arm_arch6
)
22505 if (first_reg
== 15)
22511 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22512 there could be up to 32 D-registers to restore.
22513 If there are more than 16 D-registers, make two recursive calls,
22514 each of which emits one pop_multi instruction. */
22517 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
22518 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
22522 /* The parallel needs to hold num_regs SETs
22523 and one SET for the stack update. */
22524 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
22526 /* Increment the stack pointer, based on there being
22527 num_regs 8-byte registers to restore. */
22528 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
22529 RTX_FRAME_RELATED_P (tmp
) = 1;
22530 XVECEXP (par
, 0, 0) = tmp
;
22532 /* Now show every reg that will be restored, using a SET for each. */
22533 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
22535 reg
= gen_rtx_REG (DFmode
, i
);
22537 tmp
= gen_rtx_SET (reg
,
22540 plus_constant (Pmode
, base_reg
, 8 * j
)));
22541 RTX_FRAME_RELATED_P (tmp
) = 1;
22542 XVECEXP (par
, 0, j
+ 1) = tmp
;
22544 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22549 par
= emit_insn (par
);
22550 REG_NOTES (par
) = dwarf
;
22552 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22553 if (REGNO (base_reg
) == IP_REGNUM
)
22555 RTX_FRAME_RELATED_P (par
) = 1;
22556 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
22559 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
22560 base_reg
, base_reg
);
22563 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22564 number of registers are being popped, multiple LDRD patterns are created for
22565 all register pairs. If odd number of registers are popped, last register is
22566 loaded by using LDR pattern. */
22568 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
22572 rtx par
= NULL_RTX
;
22573 rtx dwarf
= NULL_RTX
;
22574 rtx tmp
, reg
, tmp1
;
22575 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
22577 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22578 if (saved_regs_mask
& (1 << i
))
22581 gcc_assert (num_regs
&& num_regs
<= 16);
22583 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22584 to be popped. So, if num_regs is even, now it will become odd,
22585 and we can generate pop with PC. If num_regs is odd, it will be
22586 even now, and ldr with return can be generated for PC. */
22590 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
22592 /* Var j iterates over all the registers to gather all the registers in
22593 saved_regs_mask. Var i gives index of saved registers in stack frame.
22594 A PARALLEL RTX of register-pair is created here, so that pattern for
22595 LDRD can be matched. As PC is always last register to be popped, and
22596 we have already decremented num_regs if PC, we don't have to worry
22597 about PC in this loop. */
22598 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
22599 if (saved_regs_mask
& (1 << j
))
22601 /* Create RTX for memory load. */
22602 reg
= gen_rtx_REG (SImode
, j
);
22603 tmp
= gen_rtx_SET (reg
,
22604 gen_frame_mem (SImode
,
22605 plus_constant (Pmode
,
22606 stack_pointer_rtx
, 4 * i
)));
22607 RTX_FRAME_RELATED_P (tmp
) = 1;
22611 /* When saved-register index (i) is even, the RTX to be emitted is
22612 yet to be created. Hence create it first. The LDRD pattern we
22613 are generating is :
22614 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22615 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22616 where target registers need not be consecutive. */
22617 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22621 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22622 added as 0th element and if i is odd, reg_i is added as 1st element
22623 of LDRD pattern shown above. */
22624 XVECEXP (par
, 0, (i
% 2)) = tmp
;
22625 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22629 /* When saved-register index (i) is odd, RTXs for both the registers
22630 to be loaded are generated in above given LDRD pattern, and the
22631 pattern can be emitted now. */
22632 par
= emit_insn (par
);
22633 REG_NOTES (par
) = dwarf
;
22634 RTX_FRAME_RELATED_P (par
) = 1;
22640 /* If the number of registers pushed is odd AND return_in_pc is false OR
22641 number of registers are even AND return_in_pc is true, last register is
22642 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22643 then LDR with post increment. */
22645 /* Increment the stack pointer, based on there being
22646 num_regs 4-byte registers to restore. */
22647 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22648 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
22649 RTX_FRAME_RELATED_P (tmp
) = 1;
22650 tmp
= emit_insn (tmp
);
22653 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
22654 stack_pointer_rtx
, stack_pointer_rtx
);
22659 if (((num_regs
% 2) == 1 && !return_in_pc
)
22660 || ((num_regs
% 2) == 0 && return_in_pc
))
22662 /* Scan for the single register to be popped. Skip until the saved
22663 register is found. */
22664 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
22666 /* Gen LDR with post increment here. */
22667 tmp1
= gen_rtx_MEM (SImode
,
22668 gen_rtx_POST_INC (SImode
,
22669 stack_pointer_rtx
));
22670 set_mem_alias_set (tmp1
, get_frame_alias_set ());
22672 reg
= gen_rtx_REG (SImode
, j
);
22673 tmp
= gen_rtx_SET (reg
, tmp1
);
22674 RTX_FRAME_RELATED_P (tmp
) = 1;
22675 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22679 /* If return_in_pc, j must be PC_REGNUM. */
22680 gcc_assert (j
== PC_REGNUM
);
22681 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22682 XVECEXP (par
, 0, 0) = ret_rtx
;
22683 XVECEXP (par
, 0, 1) = tmp
;
22684 par
= emit_jump_insn (par
);
22688 par
= emit_insn (tmp
);
22689 REG_NOTES (par
) = dwarf
;
22690 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
22691 stack_pointer_rtx
, stack_pointer_rtx
);
22695 else if ((num_regs
% 2) == 1 && return_in_pc
)
22697 /* There are 2 registers to be popped. So, generate the pattern
22698 pop_multiple_with_stack_update_and_return to pop in PC. */
22699 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
22705 /* LDRD in ARM mode needs consecutive registers as operands. This function
22706 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22707 offset addressing and then generates one separate stack udpate. This provides
22708 more scheduling freedom, compared to writeback on every load. However,
22709 if the function returns using load into PC directly
22710 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22711 before the last load. TODO: Add a peephole optimization to recognize
22712 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22713 peephole optimization to merge the load at stack-offset zero
22714 with the stack update instruction using load with writeback
22715 in post-index addressing mode. */
22717 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
22721 rtx par
= NULL_RTX
;
22722 rtx dwarf
= NULL_RTX
;
22725 /* Restore saved registers. */
22726 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
22728 while (j
<= LAST_ARM_REGNUM
)
22729 if (saved_regs_mask
& (1 << j
))
22732 && (saved_regs_mask
& (1 << (j
+ 1)))
22733 && (j
+ 1) != PC_REGNUM
)
22735 /* Current register and next register form register pair for which
22736 LDRD can be generated. PC is always the last register popped, and
22737 we handle it separately. */
22739 mem
= gen_frame_mem (DImode
,
22740 plus_constant (Pmode
,
22744 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
22746 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
22747 tmp
= emit_insn (tmp
);
22748 RTX_FRAME_RELATED_P (tmp
) = 1;
22750 /* Generate dwarf info. */
22752 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22753 gen_rtx_REG (SImode
, j
),
22755 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22756 gen_rtx_REG (SImode
, j
+ 1),
22759 REG_NOTES (tmp
) = dwarf
;
22764 else if (j
!= PC_REGNUM
)
22766 /* Emit a single word load. */
22768 mem
= gen_frame_mem (SImode
,
22769 plus_constant (Pmode
,
22773 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
22775 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
22776 tmp
= emit_insn (tmp
);
22777 RTX_FRAME_RELATED_P (tmp
) = 1;
22779 /* Generate dwarf info. */
22780 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
22781 gen_rtx_REG (SImode
, j
),
22787 else /* j == PC_REGNUM */
22793 /* Update the stack. */
22796 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22797 plus_constant (Pmode
,
22800 tmp
= emit_insn (tmp
);
22801 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
22802 stack_pointer_rtx
, stack_pointer_rtx
);
22806 if (saved_regs_mask
& (1 << PC_REGNUM
))
22808 /* Only PC is to be popped. */
22809 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22810 XVECEXP (par
, 0, 0) = ret_rtx
;
22811 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
22812 gen_frame_mem (SImode
,
22813 gen_rtx_POST_INC (SImode
,
22814 stack_pointer_rtx
)));
22815 RTX_FRAME_RELATED_P (tmp
) = 1;
22816 XVECEXP (par
, 0, 1) = tmp
;
22817 par
= emit_jump_insn (par
);
22819 /* Generate dwarf info. */
22820 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22821 gen_rtx_REG (SImode
, PC_REGNUM
),
22823 REG_NOTES (par
) = dwarf
;
22824 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
22825 stack_pointer_rtx
, stack_pointer_rtx
);
22829 /* Calculate the size of the return value that is passed in registers. */
22831 arm_size_return_regs (void)
22835 if (crtl
->return_rtx
!= 0)
22836 mode
= GET_MODE (crtl
->return_rtx
);
22838 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
22840 return GET_MODE_SIZE (mode
);
22843 /* Return true if the current function needs to save/restore LR. */
22845 thumb_force_lr_save (void)
22847 return !cfun
->machine
->lr_save_eliminated
22849 || thumb_far_jump_used_p ()
22850 || df_regs_ever_live_p (LR_REGNUM
));
22853 /* We do not know if r3 will be available because
22854 we do have an indirect tailcall happening in this
22855 particular case. */
22857 is_indirect_tailcall_p (rtx call
)
22859 rtx pat
= PATTERN (call
);
22861 /* Indirect tail call. */
22862 pat
= XVECEXP (pat
, 0, 0);
22863 if (GET_CODE (pat
) == SET
)
22864 pat
= SET_SRC (pat
);
22866 pat
= XEXP (XEXP (pat
, 0), 0);
22867 return REG_P (pat
);
22870 /* Return true if r3 is used by any of the tail call insns in the
22871 current function. */
22873 any_sibcall_could_use_r3 (void)
22878 if (!crtl
->tail_call_emit
)
22880 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
22881 if (e
->flags
& EDGE_SIBCALL
)
22883 rtx_insn
*call
= BB_END (e
->src
);
22884 if (!CALL_P (call
))
22885 call
= prev_nonnote_nondebug_insn (call
);
22886 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
22887 if (find_regno_fusage (call
, USE
, 3)
22888 || is_indirect_tailcall_p (call
))
22895 /* Compute the distance from register FROM to register TO.
22896 These can be the arg pointer (26), the soft frame pointer (25),
22897 the stack pointer (13) or the hard frame pointer (11).
22898 In thumb mode r7 is used as the soft frame pointer, if needed.
22899 Typical stack layout looks like this:
22901 old stack pointer -> | |
22904 | | saved arguments for
22905 | | vararg functions
22908 hard FP & arg pointer -> | | \
22916 soft frame pointer -> | | /
22921 locals base pointer -> | | /
22926 current stack pointer -> | | /
22929 For a given function some or all of these stack components
22930 may not be needed, giving rise to the possibility of
22931 eliminating some of the registers.
22933 The values returned by this function must reflect the behavior
22934 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22936 The sign of the number returned reflects the direction of stack
22937 growth, so the values are positive for all eliminations except
22938 from the soft frame pointer to the hard frame pointer.
22940 SFP may point just inside the local variables block to ensure correct
22944 /* Return cached stack offsets. */
22946 static arm_stack_offsets
*
22947 arm_get_frame_offsets (void)
22949 struct arm_stack_offsets
*offsets
;
22951 offsets
= &cfun
->machine
->stack_offsets
;
22957 /* Calculate stack offsets. These are used to calculate register elimination
22958 offsets and in prologue/epilogue code. Also calculates which registers
22959 should be saved. */
22962 arm_compute_frame_layout (void)
22964 struct arm_stack_offsets
*offsets
;
22965 unsigned long func_type
;
22968 HOST_WIDE_INT frame_size
;
22971 offsets
= &cfun
->machine
->stack_offsets
;
22973 /* Initially this is the size of the local variables. It will translated
22974 into an offset once we have determined the size of preceding data. */
22975 frame_size
= ROUND_UP_WORD (get_frame_size ());
22977 /* Space for variadic functions. */
22978 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
22980 /* In Thumb mode this is incorrect, but never used. */
22982 = (offsets
->saved_args
22983 + arm_compute_static_chain_stack_bytes ()
22984 + (frame_pointer_needed
? 4 : 0));
22988 unsigned int regno
;
22990 offsets
->saved_regs_mask
= arm_compute_save_core_reg_mask ();
22991 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
22992 saved
= core_saved
;
22994 /* We know that SP will be doubleword aligned on entry, and we must
22995 preserve that condition at any subroutine call. We also require the
22996 soft frame pointer to be doubleword aligned. */
22998 if (TARGET_REALLY_IWMMXT
)
23000 /* Check for the call-saved iWMMXt registers. */
23001 for (regno
= FIRST_IWMMXT_REGNUM
;
23002 regno
<= LAST_IWMMXT_REGNUM
;
23004 if (reg_needs_saving_p (regno
))
23008 func_type
= arm_current_func_type ();
23009 /* Space for saved VFP registers. */
23010 if (! IS_VOLATILE (func_type
)
23011 && TARGET_VFP_BASE
)
23012 saved
+= arm_get_vfp_saved_size ();
23014 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
23015 nonecure entry functions with VSTR/VLDR. */
23016 if (TARGET_HAVE_FPCXT_CMSE
&& IS_CMSE_ENTRY (func_type
))
23019 else /* TARGET_THUMB1 */
23021 offsets
->saved_regs_mask
= thumb1_compute_save_core_reg_mask ();
23022 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
23023 saved
= core_saved
;
23024 if (TARGET_BACKTRACE
)
23028 /* Saved registers include the stack frame. */
23029 offsets
->saved_regs
23030 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
23031 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
23033 /* A leaf function does not need any stack alignment if it has nothing
23035 if (crtl
->is_leaf
&& frame_size
== 0
23036 /* However if it calls alloca(), we have a dynamically allocated
23037 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
23038 && ! cfun
->calls_alloca
)
23040 offsets
->outgoing_args
= offsets
->soft_frame
;
23041 offsets
->locals_base
= offsets
->soft_frame
;
23045 /* Ensure SFP has the correct alignment. */
23046 if (ARM_DOUBLEWORD_ALIGN
23047 && (offsets
->soft_frame
& 7))
23049 offsets
->soft_frame
+= 4;
23050 /* Try to align stack by pushing an extra reg. Don't bother doing this
23051 when there is a stack frame as the alignment will be rolled into
23052 the normal stack adjustment. */
23053 if (frame_size
+ crtl
->outgoing_args_size
== 0)
23057 /* Register r3 is caller-saved. Normally it does not need to be
23058 saved on entry by the prologue. However if we choose to save
23059 it for padding then we may confuse the compiler into thinking
23060 a prologue sequence is required when in fact it is not. This
23061 will occur when shrink-wrapping if r3 is used as a scratch
23062 register and there are no other callee-saved writes.
23064 This situation can be avoided when other callee-saved registers
23065 are available and r3 is not mandatory if we choose a callee-saved
23066 register for padding. */
23067 bool prefer_callee_reg_p
= false;
23069 /* If it is safe to use r3, then do so. This sometimes
23070 generates better code on Thumb-2 by avoiding the need to
23071 use 32-bit push/pop instructions. */
23072 if (! any_sibcall_could_use_r3 ()
23073 && arm_size_return_regs () <= 12
23074 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
23076 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
23079 if (!TARGET_THUMB2
)
23080 prefer_callee_reg_p
= true;
23083 || prefer_callee_reg_p
)
23085 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
23087 /* Avoid fixed registers; they may be changed at
23088 arbitrary times so it's unsafe to restore them
23089 during the epilogue. */
23091 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
23101 offsets
->saved_regs
+= 4;
23102 offsets
->saved_regs_mask
|= (1 << reg
);
23107 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
23108 offsets
->outgoing_args
= (offsets
->locals_base
23109 + crtl
->outgoing_args_size
);
23111 if (ARM_DOUBLEWORD_ALIGN
)
23113 /* Ensure SP remains doubleword aligned. */
23114 if (offsets
->outgoing_args
& 7)
23115 offsets
->outgoing_args
+= 4;
23116 gcc_assert (!(offsets
->outgoing_args
& 7));
23121 /* Calculate the relative offsets for the different stack pointers. Positive
23122 offsets are in the direction of stack growth. */
23125 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
23127 arm_stack_offsets
*offsets
;
23129 offsets
= arm_get_frame_offsets ();
23131 /* OK, now we have enough information to compute the distances.
23132 There must be an entry in these switch tables for each pair
23133 of registers in ELIMINABLE_REGS, even if some of the entries
23134 seem to be redundant or useless. */
23137 case ARG_POINTER_REGNUM
:
23140 case THUMB_HARD_FRAME_POINTER_REGNUM
:
23143 case FRAME_POINTER_REGNUM
:
23144 /* This is the reverse of the soft frame pointer
23145 to hard frame pointer elimination below. */
23146 return offsets
->soft_frame
- offsets
->saved_args
;
23148 case ARM_HARD_FRAME_POINTER_REGNUM
:
23149 /* This is only non-zero in the case where the static chain register
23150 is stored above the frame. */
23151 return offsets
->frame
- offsets
->saved_args
- 4;
23153 case STACK_POINTER_REGNUM
:
23154 /* If nothing has been pushed on the stack at all
23155 then this will return -4. This *is* correct! */
23156 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
23159 gcc_unreachable ();
23161 gcc_unreachable ();
23163 case FRAME_POINTER_REGNUM
:
23166 case THUMB_HARD_FRAME_POINTER_REGNUM
:
23169 case ARM_HARD_FRAME_POINTER_REGNUM
:
23170 /* The hard frame pointer points to the top entry in the
23171 stack frame. The soft frame pointer to the bottom entry
23172 in the stack frame. If there is no stack frame at all,
23173 then they are identical. */
23175 return offsets
->frame
- offsets
->soft_frame
;
23177 case STACK_POINTER_REGNUM
:
23178 return offsets
->outgoing_args
- offsets
->soft_frame
;
23181 gcc_unreachable ();
23183 gcc_unreachable ();
23186 /* You cannot eliminate from the stack pointer.
23187 In theory you could eliminate from the hard frame
23188 pointer to the stack pointer, but this will never
23189 happen, since if a stack frame is not needed the
23190 hard frame pointer will never be used. */
23191 gcc_unreachable ();
23195 /* Given FROM and TO register numbers, say whether this elimination is
23196 allowed. Frame pointer elimination is automatically handled.
23198 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
23199 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
23200 pointer, we must eliminate FRAME_POINTER_REGNUM into
23201 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23202 ARG_POINTER_REGNUM. */
23205 arm_can_eliminate (const int from
, const int to
)
23207 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
23208 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
23209 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
23210 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
23214 /* Emit RTL to save coprocessor registers on function entry. Returns the
23215 number of bytes pushed. */
23218 arm_save_coproc_regs(void)
23220 int saved_size
= 0;
23222 unsigned start_reg
;
23225 if (TARGET_REALLY_IWMMXT
)
23226 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
23227 if (reg_needs_saving_p (reg
))
23229 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23230 insn
= gen_rtx_MEM (V2SImode
, insn
);
23231 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
23232 RTX_FRAME_RELATED_P (insn
) = 1;
23236 if (TARGET_VFP_BASE
)
23238 start_reg
= FIRST_VFP_REGNUM
;
23240 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
23242 if (!reg_needs_saving_p (reg
) && !reg_needs_saving_p (reg
+ 1))
23244 if (start_reg
!= reg
)
23245 saved_size
+= vfp_emit_fstmd (start_reg
,
23246 (reg
- start_reg
) / 2);
23247 start_reg
= reg
+ 2;
23250 if (start_reg
!= reg
)
23251 saved_size
+= vfp_emit_fstmd (start_reg
,
23252 (reg
- start_reg
) / 2);
23258 /* Set the Thumb frame pointer from the stack pointer. */
23261 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
23263 HOST_WIDE_INT amount
;
23266 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
23268 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23269 stack_pointer_rtx
, GEN_INT (amount
)));
23272 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
23273 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
23274 expects the first two operands to be the same. */
23277 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23279 hard_frame_pointer_rtx
));
23283 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23284 hard_frame_pointer_rtx
,
23285 stack_pointer_rtx
));
23287 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
23288 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
23289 RTX_FRAME_RELATED_P (dwarf
) = 1;
23290 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23293 RTX_FRAME_RELATED_P (insn
) = 1;
23296 struct scratch_reg
{
23301 /* Return a short-lived scratch register for use as a 2nd scratch register on
23302 function entry after the registers are saved in the prologue. This register
23303 must be released by means of release_scratch_register_on_entry. IP is not
23304 considered since it is always used as the 1st scratch register if available.
23306 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23307 mask of live registers. */
23310 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
23311 unsigned long live_regs
)
23317 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
23323 for (i
= 4; i
< 11; i
++)
23324 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
23332 /* If IP is used as the 1st scratch register for a nested function,
23333 then either r3 wasn't available or is used to preserve IP. */
23334 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
23336 regno
= (regno1
== 3 ? 2 : 3);
23338 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
23343 sr
->reg
= gen_rtx_REG (SImode
, regno
);
23346 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23347 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
23348 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
23349 plus_constant (Pmode
, stack_pointer_rtx
, -4));
23350 RTX_FRAME_RELATED_P (insn
) = 1;
23351 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
23355 /* Release a scratch register obtained from the preceding function. */
23358 release_scratch_register_on_entry (struct scratch_reg
*sr
)
23362 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
23363 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
23364 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
23365 plus_constant (Pmode
, stack_pointer_rtx
, 4));
23366 RTX_FRAME_RELATED_P (insn
) = 1;
23367 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
23371 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23373 #if PROBE_INTERVAL > 4096
23374 #error Cannot use indexed addressing mode for stack probing
23377 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23378 inclusive. These are offsets from the current stack pointer. REGNO1
23379 is the index number of the 1st scratch register and LIVE_REGS is the
23380 mask of live registers. */
23383 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
23384 unsigned int regno1
, unsigned long live_regs
)
23386 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
23388 /* See if we have a constant small number of probes to generate. If so,
23389 that's the easy case. */
23390 if (size
<= PROBE_INTERVAL
)
23392 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
23393 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23394 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
23397 /* The run-time loop is made up of 10 insns in the generic case while the
23398 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23399 else if (size
<= 5 * PROBE_INTERVAL
)
23401 HOST_WIDE_INT i
, rem
;
23403 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
23404 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23405 emit_stack_probe (reg1
);
23407 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23408 it exceeds SIZE. If only two probes are needed, this will not
23409 generate any code. Then probe at FIRST + SIZE. */
23410 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
23412 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
23413 emit_stack_probe (reg1
);
23416 rem
= size
- (i
- PROBE_INTERVAL
);
23417 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
23419 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
23420 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
23423 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
23426 /* Otherwise, do the same as above, but in a loop. Note that we must be
23427 extra careful with variables wrapping around because we might be at
23428 the very top (or the very bottom) of the address space and we have
23429 to be able to handle this case properly; in particular, we use an
23430 equality test for the loop condition. */
23433 HOST_WIDE_INT rounded_size
;
23434 struct scratch_reg sr
;
23436 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
23438 emit_move_insn (reg1
, GEN_INT (first
));
23441 /* Step 1: round SIZE to the previous multiple of the interval. */
23443 rounded_size
= size
& -PROBE_INTERVAL
;
23444 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
23447 /* Step 2: compute initial and final value of the loop counter. */
23449 /* TEST_ADDR = SP + FIRST. */
23450 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23452 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23453 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
23456 /* Step 3: the loop
23460 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23463 while (TEST_ADDR != LAST_ADDR)
23465 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23466 until it is equal to ROUNDED_SIZE. */
23468 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
23471 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23472 that SIZE is equal to ROUNDED_SIZE. */
23474 if (size
!= rounded_size
)
23476 HOST_WIDE_INT rem
= size
- rounded_size
;
23478 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
23480 emit_set_insn (sr
.reg
,
23481 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
23482 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
23483 PROBE_INTERVAL
- rem
));
23486 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
23489 release_scratch_register_on_entry (&sr
);
23492 /* Make sure nothing is scheduled before we are done. */
23493 emit_insn (gen_blockage ());
23496 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23497 absolute addresses. */
23500 output_probe_stack_range (rtx reg1
, rtx reg2
)
23502 static int labelno
= 0;
23506 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
23509 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
23511 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23513 xops
[1] = GEN_INT (PROBE_INTERVAL
);
23514 output_asm_insn ("sub\t%0, %0, %1", xops
);
23516 /* Probe at TEST_ADDR. */
23517 output_asm_insn ("str\tr0, [%0, #0]", xops
);
23519 /* Test if TEST_ADDR == LAST_ADDR. */
23521 output_asm_insn ("cmp\t%0, %1", xops
);
23524 fputs ("\tbne\t", asm_out_file
);
23525 assemble_name_raw (asm_out_file
, loop_lab
);
23526 fputc ('\n', asm_out_file
);
23531 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23534 arm_expand_prologue (void)
23539 unsigned long live_regs_mask
;
23540 unsigned long func_type
;
23542 int saved_pretend_args
= 0;
23543 int saved_regs
= 0;
23544 unsigned HOST_WIDE_INT args_to_push
;
23545 HOST_WIDE_INT size
;
23546 arm_stack_offsets
*offsets
;
23549 func_type
= arm_current_func_type ();
23551 /* Naked functions don't have prologues. */
23552 if (IS_NAKED (func_type
))
23554 if (flag_stack_usage_info
)
23555 current_function_static_stack_size
= 0;
23559 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23560 args_to_push
= crtl
->args
.pretend_args_size
;
23562 /* Compute which register we will have to save onto the stack. */
23563 offsets
= arm_get_frame_offsets ();
23564 live_regs_mask
= offsets
->saved_regs_mask
;
23566 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
23568 if (IS_STACKALIGN (func_type
))
23572 /* Handle a word-aligned stack pointer. We generate the following:
23577 <save and restore r0 in normal prologue/epilogue>
23581 The unwinder doesn't need to know about the stack realignment.
23582 Just tell it we saved SP in r0. */
23583 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
23585 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
23586 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
23588 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
23589 RTX_FRAME_RELATED_P (insn
) = 1;
23590 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
23592 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
23594 /* ??? The CFA changes here, which may cause GDB to conclude that it
23595 has entered a different function. That said, the unwind info is
23596 correct, individually, before and after this instruction because
23597 we've described the save of SP, which will override the default
23598 handling of SP as restoring from the CFA. */
23599 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
23602 /* Let's compute the static_chain_stack_bytes required and store it. Right
23603 now the value must be -1 as stored by arm_init_machine_status (). */
23604 cfun
->machine
->static_chain_stack_bytes
23605 = arm_compute_static_chain_stack_bytes ();
23607 /* The static chain register is the same as the IP register. If it is
23608 clobbered when creating the frame, we need to save and restore it. */
23609 clobber_ip
= (IS_NESTED (func_type
)
23610 && (((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
23611 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
23612 || flag_stack_clash_protection
)
23613 && !df_regs_ever_live_p (LR_REGNUM
)
23614 && arm_r3_live_at_start_p ()))
23615 || arm_current_function_pac_enabled_p ()));
23617 /* Find somewhere to store IP whilst the frame is being created.
23618 We try the following places in order:
23620 1. The last argument register r3 if it is available.
23621 2. A slot on the stack above the frame if there are no
23622 arguments to push onto the stack.
23623 3. Register r3 again, after pushing the argument registers
23624 onto the stack, if this is a varargs function.
23625 4. The last slot on the stack created for the arguments to
23626 push, if this isn't a varargs function.
23628 Note - we only need to tell the dwarf2 backend about the SP
23629 adjustment in the second variant; the static chain register
23630 doesn't need to be unwound, as it doesn't contain a value
23631 inherited from the caller. */
23634 if (!arm_r3_live_at_start_p ())
23635 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
23636 else if (args_to_push
== 0)
23642 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23643 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
23646 /* Just tell the dwarf backend that we adjusted SP. */
23647 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23648 plus_constant (Pmode
, stack_pointer_rtx
,
23650 RTX_FRAME_RELATED_P (insn
) = 1;
23651 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23652 if (arm_current_function_pac_enabled_p ())
23653 cfun
->machine
->pacspval_needed
= 1;
23657 /* Store the args on the stack. */
23658 if (cfun
->machine
->uses_anonymous_args
)
23660 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
23661 (0xf0 >> (args_to_push
/ 4)) & 0xf);
23662 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
23663 saved_pretend_args
= 1;
23669 if (args_to_push
== 4)
23670 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23672 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
23673 plus_constant (Pmode
,
23677 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
23679 /* Just tell the dwarf backend that we adjusted SP. */
23680 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23681 plus_constant (Pmode
, stack_pointer_rtx
,
23683 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23686 RTX_FRAME_RELATED_P (insn
) = 1;
23687 fp_offset
= args_to_push
;
23689 if (arm_current_function_pac_enabled_p ())
23690 cfun
->machine
->pacspval_needed
= 1;
23694 if (arm_current_function_pac_enabled_p ())
23696 /* If IP was clobbered we only emit a PAC instruction as the BTI
23697 one will be added before the push of the clobbered IP (if
23698 necessary) by the bti pass. */
23699 if (aarch_bti_enabled () && !clobber_ip
)
23700 insn
= emit_insn (gen_pacbti_nop ());
23702 insn
= emit_insn (gen_pac_nop ());
23704 rtx dwarf
= gen_rtx_SET (ip_rtx
, gen_rtx_REG (SImode
, RA_AUTH_CODE
));
23705 RTX_FRAME_RELATED_P (insn
) = 1;
23706 add_reg_note (insn
, REG_CFA_REGISTER
, dwarf
);
23709 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
23711 if (IS_INTERRUPT (func_type
))
23713 /* Interrupt functions must not corrupt any registers.
23714 Creating a frame pointer however, corrupts the IP
23715 register, so we must push it first. */
23716 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
23718 /* Do not set RTX_FRAME_RELATED_P on this insn.
23719 The dwarf stack unwinding code only wants to see one
23720 stack decrement per function, and this is not it. If
23721 this instruction is labeled as being part of the frame
23722 creation sequence then dwarf2out_frame_debug_expr will
23723 die when it encounters the assignment of IP to FP
23724 later on, since the use of SP here establishes SP as
23725 the CFA register and not IP.
23727 Anyway this instruction is not really part of the stack
23728 frame creation although it is part of the prologue. */
23731 insn
= emit_set_insn (ip_rtx
,
23732 plus_constant (Pmode
, stack_pointer_rtx
,
23734 RTX_FRAME_RELATED_P (insn
) = 1;
23737 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23738 if (TARGET_HAVE_FPCXT_CMSE
&& IS_CMSE_ENTRY (func_type
))
23741 insn
= emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx
,
23742 GEN_INT (FPCXTNS_ENUM
)));
23743 rtx dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23744 plus_constant (Pmode
, stack_pointer_rtx
, -4));
23745 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23746 RTX_FRAME_RELATED_P (insn
) = 1;
23751 /* Push the argument registers, or reserve space for them. */
23752 if (cfun
->machine
->uses_anonymous_args
)
23753 insn
= emit_multi_reg_push
23754 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
23755 (0xf0 >> (args_to_push
/ 4)) & 0xf);
23758 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
23759 GEN_INT (- args_to_push
)));
23760 RTX_FRAME_RELATED_P (insn
) = 1;
23763 /* If this is an interrupt service routine, and the link register
23764 is going to be pushed, and we're not generating extra
23765 push of IP (needed when frame is needed and frame layout if apcs),
23766 subtracting four from LR now will mean that the function return
23767 can be done with a single instruction. */
23768 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
23769 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
23770 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
23773 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
23775 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
23778 if (live_regs_mask
)
23780 unsigned long dwarf_regs_mask
= live_regs_mask
;
23782 saved_regs
+= bit_count (live_regs_mask
) * 4;
23783 if (optimize_size
&& !frame_pointer_needed
23784 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
23786 /* If no coprocessor registers are being pushed and we don't have
23787 to worry about a frame pointer then push extra registers to
23788 create the stack frame. This is done in a way that does not
23789 alter the frame layout, so is independent of the epilogue. */
23793 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
23795 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
23796 if (frame
&& n
* 4 >= frame
)
23799 live_regs_mask
|= (1 << n
) - 1;
23800 saved_regs
+= frame
;
23805 && current_tune
->prefer_ldrd_strd
23806 && !optimize_function_for_size_p (cfun
))
23808 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
23810 thumb2_emit_strd_push (live_regs_mask
);
23811 else if (TARGET_ARM
23812 && !TARGET_APCS_FRAME
23813 && !IS_INTERRUPT (func_type
))
23814 arm_emit_strd_push (live_regs_mask
);
23817 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
23818 RTX_FRAME_RELATED_P (insn
) = 1;
23823 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
23824 RTX_FRAME_RELATED_P (insn
) = 1;
23828 if (! IS_VOLATILE (func_type
))
23829 saved_regs
+= arm_save_coproc_regs ();
23831 if (frame_pointer_needed
&& TARGET_ARM
)
23833 /* Create the new frame pointer. */
23834 if (TARGET_APCS_FRAME
)
23836 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
23837 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
23838 RTX_FRAME_RELATED_P (insn
) = 1;
23842 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
23843 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23844 stack_pointer_rtx
, insn
));
23845 RTX_FRAME_RELATED_P (insn
) = 1;
23849 size
= offsets
->outgoing_args
- offsets
->saved_args
;
23850 if (flag_stack_usage_info
)
23851 current_function_static_stack_size
= size
;
23853 /* If this isn't an interrupt service routine and we have a frame, then do
23854 stack checking. We use IP as the first scratch register, except for the
23855 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
23856 if (!IS_INTERRUPT (func_type
)
23857 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
23858 || flag_stack_clash_protection
))
23860 unsigned int regno
;
23862 if (!IS_NESTED (func_type
) || clobber_ip
)
23864 else if (df_regs_ever_live_p (LR_REGNUM
))
23869 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
23871 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
23872 arm_emit_probe_stack_range (get_stack_check_protect (),
23873 size
- get_stack_check_protect (),
23874 regno
, live_regs_mask
);
23877 arm_emit_probe_stack_range (get_stack_check_protect (), size
,
23878 regno
, live_regs_mask
);
23881 /* Recover the static chain register. */
23884 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
23885 insn
= gen_rtx_REG (SImode
, 3);
23888 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
23889 insn
= gen_frame_mem (SImode
, insn
);
23891 emit_set_insn (ip_rtx
, insn
);
23892 emit_insn (gen_force_register_use (ip_rtx
));
23895 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
23897 /* This add can produce multiple insns for a large constant, so we
23898 need to get tricky. */
23899 rtx_insn
*last
= get_last_insn ();
23901 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
23902 - offsets
->outgoing_args
);
23904 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
23908 last
= last
? NEXT_INSN (last
) : get_insns ();
23909 RTX_FRAME_RELATED_P (last
) = 1;
23911 while (last
!= insn
);
23913 /* If the frame pointer is needed, emit a special barrier that
23914 will prevent the scheduler from moving stores to the frame
23915 before the stack adjustment. */
23916 if (frame_pointer_needed
)
23917 emit_insn (gen_stack_tie (stack_pointer_rtx
,
23918 hard_frame_pointer_rtx
));
23922 if (frame_pointer_needed
&& TARGET_THUMB2
)
23923 thumb_set_frame_pointer (offsets
);
23925 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
23927 unsigned long mask
;
23929 mask
= live_regs_mask
;
23930 mask
&= THUMB2_WORK_REGS
;
23931 if (!IS_NESTED (func_type
))
23932 mask
|= (1 << IP_REGNUM
);
23933 arm_load_pic_register (mask
, NULL_RTX
);
23936 /* If we are profiling, make sure no instructions are scheduled before
23937 the call to mcount. Similarly if the user has requested no
23938 scheduling in the prolog. Similarly if we want non-call exceptions
23939 using the EABI unwinder, to prevent faulting instructions from being
23940 swapped with a stack adjustment. */
23941 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
23942 || (arm_except_unwind_info (&global_options
) == UI_TARGET
23943 && cfun
->can_throw_non_call_exceptions
))
23944 emit_insn (gen_blockage ());
23946 /* If the link register is being kept alive, with the return address in it,
23947 then make sure that it does not get reused by the ce2 pass. */
23948 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
23949 cfun
->machine
->lr_save_eliminated
= 1;
23952 /* Print condition code to STREAM. Helper function for arm_print_operand. */
23954 arm_print_condition (FILE *stream
)
23956 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
23958 /* Branch conversion is not implemented for Thumb-2. */
23961 output_operand_lossage ("predicated Thumb instruction");
23964 if (current_insn_predicate
!= NULL
)
23966 output_operand_lossage
23967 ("predicated instruction in conditional sequence");
23971 fputs (arm_condition_codes
[arm_current_cc
], stream
);
23973 else if (current_insn_predicate
)
23975 enum arm_cond_code code
;
23979 output_operand_lossage ("predicated Thumb instruction");
23983 code
= get_arm_condition_code (current_insn_predicate
);
23984 fputs (arm_condition_codes
[code
], stream
);
23989 /* Globally reserved letters: acln
23990 Puncutation letters currently used: @_|?().!#
23991 Lower case letters currently used: bcdefhimpqtvwxyz
23992 Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
23993 Letters previously used, but now deprecated/obsolete: sWXYZ.
23995 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23997 If CODE is 'd', then the X is a condition operand and the instruction
23998 should only be executed if the condition is true.
23999 if CODE is 'D', then the X is a condition operand and the instruction
24000 should only be executed if the condition is false: however, if the mode
24001 of the comparison is CCFPEmode, then always execute the instruction -- we
24002 do this because in these circumstances !GE does not necessarily imply LT;
24003 in these cases the instruction pattern will take care to make sure that
24004 an instruction containing %d will follow, thereby undoing the effects of
24005 doing this instruction unconditionally.
24006 If CODE is 'N' then X is a floating point operand that must be negated
24008 If CODE is 'B' then output a bitwise inverted value of X (a const int).
24009 If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
24010 If CODE is 'V', then the operand must be a CONST_INT representing
24011 the bits to preserve in the modified register (Rd) of a BFI or BFC
24012 instruction: print out both the width and lsb (shift) fields. */
24014 arm_print_operand (FILE *stream
, rtx x
, int code
)
24019 fputs (ASM_COMMENT_START
, stream
);
24023 fputs (user_label_prefix
, stream
);
24027 fputs (REGISTER_PREFIX
, stream
);
24031 arm_print_condition (stream
);
24035 /* The current condition code for a condition code setting instruction.
24036 Preceded by 's' in unified syntax, otherwise followed by 's'. */
24037 fputc('s', stream
);
24038 arm_print_condition (stream
);
24042 /* If the instruction is conditionally executed then print
24043 the current condition code, otherwise print 's'. */
24044 gcc_assert (TARGET_THUMB2
);
24045 if (current_insn_predicate
)
24046 arm_print_condition (stream
);
24048 fputc('s', stream
);
24051 /* %# is a "break" sequence. It doesn't output anything, but is used to
24052 separate e.g. operand numbers from following text, if that text consists
24053 of further digits which we don't want to be part of the operand
24061 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
24062 fprintf (stream
, "%s", fp_const_from_val (&r
));
24066 /* An integer or symbol address without a preceding # sign. */
24068 switch (GET_CODE (x
))
24071 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
24075 output_addr_const (stream
, x
);
24079 if (GET_CODE (XEXP (x
, 0)) == PLUS
24080 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
24082 output_addr_const (stream
, x
);
24085 /* Fall through. */
24088 output_operand_lossage ("Unsupported operand for code '%c'", code
);
24092 /* An integer that we want to print in HEX. */
24094 switch (GET_CODE (x
))
24097 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
24101 output_operand_lossage ("Unsupported operand for code '%c'", code
);
24106 if (CONST_INT_P (x
))
24109 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
24110 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
24114 putc ('~', stream
);
24115 output_addr_const (stream
, x
);
24120 /* Print the log2 of a CONST_INT. */
24124 if (!CONST_INT_P (x
)
24125 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
24126 output_operand_lossage ("Unsupported operand for code '%c'", code
);
24128 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
24133 /* The low 16 bits of an immediate constant. */
24134 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
24138 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
24142 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
24150 shift
= shift_op (x
, &val
);
24154 fprintf (stream
, ", %s ", shift
);
24156 arm_print_operand (stream
, XEXP (x
, 1), 0);
24158 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
24163 /* An explanation of the 'Q', 'R' and 'H' register operands:
24165 In a pair of registers containing a DI or DF value the 'Q'
24166 operand returns the register number of the register containing
24167 the least significant part of the value. The 'R' operand returns
24168 the register number of the register containing the most
24169 significant part of the value.
24171 The 'H' operand returns the higher of the two register numbers.
24172 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24173 same as the 'Q' operand, since the most significant part of the
24174 value is held in the lower number register. The reverse is true
24175 on systems where WORDS_BIG_ENDIAN is false.
24177 The purpose of these operands is to distinguish between cases
24178 where the endian-ness of the values is important (for example
24179 when they are added together), and cases where the endian-ness
24180 is irrelevant, but the order of register operations is important.
24181 For example when loading a value from memory into a register
24182 pair, the endian-ness does not matter. Provided that the value
24183 from the lower memory address is put into the lower numbered
24184 register, and the value from the higher address is put into the
24185 higher numbered register, the load will work regardless of whether
24186 the value being loaded is big-wordian or little-wordian. The
24187 order of the two register loads can matter however, if the address
24188 of the memory location is actually held in one of the registers
24189 being overwritten by the load.
24191 The 'Q' and 'R' constraints are also available for 64-bit
24194 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
24196 rtx part
= gen_lowpart (SImode
, x
);
24197 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
24201 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24203 output_operand_lossage ("invalid operand for code '%c'", code
);
24207 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
24211 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
24213 machine_mode mode
= GET_MODE (x
);
24216 if (mode
== VOIDmode
)
24218 part
= gen_highpart_mode (SImode
, mode
, x
);
24219 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
24223 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24225 output_operand_lossage ("invalid operand for code '%c'", code
);
24229 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
24233 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24235 output_operand_lossage ("invalid operand for code '%c'", code
);
24239 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
24243 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24245 output_operand_lossage ("invalid operand for code '%c'", code
);
24249 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
24253 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24255 output_operand_lossage ("invalid operand for code '%c'", code
);
24259 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
24263 asm_fprintf (stream
, "%r",
24264 REG_P (XEXP (x
, 0))
24265 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
24269 asm_fprintf (stream
, "{%r-%r}",
24271 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
24274 /* Like 'M', but writing doubleword vector registers, for use by Neon
24278 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
24279 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
24281 asm_fprintf (stream
, "{d%d}", regno
);
24283 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
24288 /* CONST_TRUE_RTX means always -- that's the default. */
24289 if (x
== const_true_rtx
)
24292 if (!COMPARISON_P (x
))
24294 output_operand_lossage ("invalid operand for code '%c'", code
);
24298 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
24303 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
24304 want to do that. */
24305 if (x
== const_true_rtx
)
24307 output_operand_lossage ("instruction never executed");
24310 if (!COMPARISON_P (x
))
24312 output_operand_lossage ("invalid operand for code '%c'", code
);
24316 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
24317 (get_arm_condition_code (x
))],
24323 /* Output the LSB (shift) and width for a bitmask instruction
24324 based on a literal mask. The LSB is printed first,
24325 followed by the width.
24327 Eg. For 0b1...1110001, the result is #1, #3. */
24328 if (!CONST_INT_P (x
))
24330 output_operand_lossage ("invalid operand for code '%c'", code
);
24334 unsigned HOST_WIDE_INT val
24335 = ~UINTVAL (x
) & HOST_WIDE_INT_UC (0xffffffff);
24336 int lsb
= exact_log2 (val
& -val
);
24337 asm_fprintf (stream
, "#%d, #%d", lsb
,
24338 (exact_log2 (val
+ (val
& -val
)) - lsb
));
24347 /* Former Maverick support, removed after GCC-4.7. */
24348 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
24353 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
24354 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
24355 /* Bad value for wCG register number. */
24357 output_operand_lossage ("invalid operand for code '%c'", code
);
24362 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
24365 /* Print an iWMMXt control register name. */
24367 if (!CONST_INT_P (x
)
24369 || INTVAL (x
) >= 16)
24370 /* Bad value for wC register number. */
24372 output_operand_lossage ("invalid operand for code '%c'", code
);
24378 static const char * wc_reg_names
[16] =
24380 "wCID", "wCon", "wCSSF", "wCASF",
24381 "wC4", "wC5", "wC6", "wC7",
24382 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24383 "wC12", "wC13", "wC14", "wC15"
24386 fputs (wc_reg_names
[INTVAL (x
)], stream
);
24390 /* Print the high single-precision register of a VFP double-precision
24394 machine_mode mode
= GET_MODE (x
);
24397 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
24399 output_operand_lossage ("invalid operand for code '%c'", code
);
24404 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
24406 output_operand_lossage ("invalid operand for code '%c'", code
);
24410 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
24414 /* Print a VFP/Neon double precision or quad precision register name. */
24418 machine_mode mode
= GET_MODE (x
);
24419 int is_quad
= (code
== 'q');
24422 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
24424 output_operand_lossage ("invalid operand for code '%c'", code
);
24429 || !IS_VFP_REGNUM (REGNO (x
)))
24431 output_operand_lossage ("invalid operand for code '%c'", code
);
24436 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
24437 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
24439 output_operand_lossage ("invalid operand for code '%c'", code
);
24443 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
24444 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
24448 /* These two codes print the low/high doubleword register of a Neon quad
24449 register, respectively. For pair-structure types, can also print
24450 low/high quadword registers. */
24454 machine_mode mode
= GET_MODE (x
);
24457 if ((GET_MODE_SIZE (mode
) != 16
24458 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
24460 output_operand_lossage ("invalid operand for code '%c'", code
);
24465 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
24467 output_operand_lossage ("invalid operand for code '%c'", code
);
24471 if (GET_MODE_SIZE (mode
) == 16)
24472 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
24473 + (code
== 'f' ? 1 : 0));
24475 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
24476 + (code
== 'f' ? 1 : 0));
24480 /* Print a VFPv3 floating-point constant, represented as an integer
24484 int index
= vfp3_const_double_index (x
);
24485 gcc_assert (index
!= -1);
24486 fprintf (stream
, "%d", index
);
24490 /* Print bits representing opcode features for Neon.
24492 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24493 and polynomials as unsigned.
24495 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24497 Bit 2 is 1 for rounding functions, 0 otherwise. */
24499 /* Identify the type as 's', 'u', 'p' or 'f'. */
24502 HOST_WIDE_INT bits
= INTVAL (x
);
24503 fputc ("uspf"[bits
& 3], stream
);
24507 /* Likewise, but signed and unsigned integers are both 'i'. */
24510 HOST_WIDE_INT bits
= INTVAL (x
);
24511 fputc ("iipf"[bits
& 3], stream
);
24515 /* As for 'T', but emit 'u' instead of 'p'. */
24518 HOST_WIDE_INT bits
= INTVAL (x
);
24519 fputc ("usuf"[bits
& 3], stream
);
24523 /* Bit 2: rounding (vs none). */
24526 HOST_WIDE_INT bits
= INTVAL (x
);
24527 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
24531 /* Memory operand for vld1/vst1 instruction. */
24535 bool postinc
= FALSE
;
24536 rtx postinc_reg
= NULL
;
24537 unsigned align
, memsize
, align_bits
;
24539 gcc_assert (MEM_P (x
));
24540 addr
= XEXP (x
, 0);
24541 if (GET_CODE (addr
) == POST_INC
)
24544 addr
= XEXP (addr
, 0);
24546 if (GET_CODE (addr
) == POST_MODIFY
)
24548 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
24549 addr
= XEXP (addr
, 0);
24551 asm_fprintf (stream
, "[%r", REGNO (addr
));
24553 /* We know the alignment of this access, so we can emit a hint in the
24554 instruction (for some alignments) as an aid to the memory subsystem
24556 align
= MEM_ALIGN (x
) >> 3;
24557 memsize
= MEM_SIZE (x
);
24559 /* Only certain alignment specifiers are supported by the hardware. */
24560 if (memsize
== 32 && (align
% 32) == 0)
24562 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
24564 else if (memsize
>= 8 && (align
% 8) == 0)
24569 if (align_bits
!= 0)
24570 asm_fprintf (stream
, ":%d", align_bits
);
24572 asm_fprintf (stream
, "]");
24575 fputs("!", stream
);
24577 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
24581 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24582 rtx_code the memory operands output looks like following.
24584 2. [Rn, #+/-<imm>]!
24590 rtx postinc_reg
= NULL
;
24591 unsigned inc_val
= 0;
24592 enum rtx_code code
;
24594 gcc_assert (MEM_P (x
));
24595 addr
= XEXP (x
, 0);
24596 code
= GET_CODE (addr
);
24597 if (code
== POST_INC
|| code
== POST_DEC
|| code
== PRE_INC
24598 || code
== PRE_DEC
)
24600 asm_fprintf (stream
, "[%r", REGNO (XEXP (addr
, 0)));
24601 inc_val
= GET_MODE_SIZE (GET_MODE (x
));
24602 if (code
== POST_INC
|| code
== POST_DEC
)
24603 asm_fprintf (stream
, "], #%s%d",(code
== POST_INC
)
24604 ? "": "-", inc_val
);
24606 asm_fprintf (stream
, ", #%s%d]!",(code
== PRE_INC
)
24607 ? "": "-", inc_val
);
24609 else if (code
== POST_MODIFY
|| code
== PRE_MODIFY
)
24611 asm_fprintf (stream
, "[%r", REGNO (XEXP (addr
, 0)));
24612 postinc_reg
= XEXP (XEXP (addr
, 1), 1);
24613 if (postinc_reg
&& CONST_INT_P (postinc_reg
))
24615 if (code
== POST_MODIFY
)
24616 asm_fprintf (stream
, "], #%wd",INTVAL (postinc_reg
));
24618 asm_fprintf (stream
, ", #%wd]!",INTVAL (postinc_reg
));
24621 else if (code
== PLUS
)
24623 rtx base
= XEXP (addr
, 0);
24624 rtx index
= XEXP (addr
, 1);
24626 gcc_assert (REG_P (base
) && CONST_INT_P (index
));
24628 HOST_WIDE_INT offset
= INTVAL (index
);
24629 asm_fprintf (stream
, "[%r, #%wd]", REGNO (base
), offset
);
24633 gcc_assert (REG_P (addr
));
24634 asm_fprintf (stream
, "[%r]",REGNO (addr
));
24643 gcc_assert (MEM_P (x
));
24644 addr
= XEXP (x
, 0);
24645 gcc_assert (REG_P (addr
));
24646 asm_fprintf (stream
, "[%r]", REGNO (addr
));
24650 /* Translate an S register number into a D register number and element index. */
24653 machine_mode mode
= GET_MODE (x
);
24656 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
24658 output_operand_lossage ("invalid operand for code '%c'", code
);
24663 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
24665 output_operand_lossage ("invalid operand for code '%c'", code
);
24669 regno
= regno
- FIRST_VFP_REGNUM
;
24670 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
24675 gcc_assert (CONST_DOUBLE_P (x
));
24677 result
= vfp3_const_double_for_fract_bits (x
);
24679 result
= vfp3_const_double_for_bits (x
);
24680 fprintf (stream
, "#%d", result
);
24683 /* Register specifier for vld1.16/vst1.16. Translate the S register
24684 number into a D register number and element index. */
24687 machine_mode mode
= GET_MODE (x
);
24690 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
24692 output_operand_lossage ("invalid operand for code '%c'", code
);
24697 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
24699 output_operand_lossage ("invalid operand for code '%c'", code
);
24703 regno
= regno
- FIRST_VFP_REGNUM
;
24704 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
24711 output_operand_lossage ("missing operand");
24715 switch (GET_CODE (x
))
24718 asm_fprintf (stream
, "%r", REGNO (x
));
24722 output_address (GET_MODE (x
), XEXP (x
, 0));
24728 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
24729 sizeof (fpstr
), 0, 1);
24730 fprintf (stream
, "#%s", fpstr
);
24735 gcc_assert (GET_CODE (x
) != NEG
);
24736 fputc ('#', stream
);
24737 if (GET_CODE (x
) == HIGH
)
24739 fputs (":lower16:", stream
);
24743 output_addr_const (stream
, x
);
24749 /* Target hook for printing a memory address. */
24751 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
24755 int is_minus
= GET_CODE (x
) == MINUS
;
24758 asm_fprintf (stream
, "[%r]", REGNO (x
));
24759 else if (GET_CODE (x
) == PLUS
|| is_minus
)
24761 rtx base
= XEXP (x
, 0);
24762 rtx index
= XEXP (x
, 1);
24763 HOST_WIDE_INT offset
= 0;
24765 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
24767 /* Ensure that BASE is a register. */
24768 /* (one of them must be). */
24769 /* Also ensure the SP is not used as in index register. */
24770 std::swap (base
, index
);
24772 switch (GET_CODE (index
))
24775 offset
= INTVAL (index
);
24778 asm_fprintf (stream
, "[%r, #%wd]",
24779 REGNO (base
), offset
);
24783 asm_fprintf (stream
, "[%r, %s%r]",
24784 REGNO (base
), is_minus
? "-" : "",
24794 asm_fprintf (stream
, "[%r, %s%r",
24795 REGNO (base
), is_minus
? "-" : "",
24796 REGNO (XEXP (index
, 0)));
24797 arm_print_operand (stream
, index
, 'S');
24798 fputs ("]", stream
);
24803 gcc_unreachable ();
24806 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
24807 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
24809 gcc_assert (REG_P (XEXP (x
, 0)));
24811 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
24812 asm_fprintf (stream
, "[%r, #%s%d]!",
24813 REGNO (XEXP (x
, 0)),
24814 GET_CODE (x
) == PRE_DEC
? "-" : "",
24815 GET_MODE_SIZE (mode
));
24816 else if (TARGET_HAVE_MVE
&& (mode
== OImode
|| mode
== XImode
))
24817 asm_fprintf (stream
, "[%r]!", REGNO (XEXP (x
,0)));
24819 asm_fprintf (stream
, "[%r], #%s%d", REGNO (XEXP (x
, 0)),
24820 GET_CODE (x
) == POST_DEC
? "-" : "",
24821 GET_MODE_SIZE (mode
));
24823 else if (GET_CODE (x
) == PRE_MODIFY
)
24825 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
24826 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
24827 asm_fprintf (stream
, "#%wd]!",
24828 INTVAL (XEXP (XEXP (x
, 1), 1)));
24830 asm_fprintf (stream
, "%r]!",
24831 REGNO (XEXP (XEXP (x
, 1), 1)));
24833 else if (GET_CODE (x
) == POST_MODIFY
)
24835 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
24836 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
24837 asm_fprintf (stream
, "#%wd",
24838 INTVAL (XEXP (XEXP (x
, 1), 1)));
24840 asm_fprintf (stream
, "%r",
24841 REGNO (XEXP (XEXP (x
, 1), 1)));
24843 else output_addr_const (stream
, x
);
24848 asm_fprintf (stream
, "[%r]", REGNO (x
));
24849 else if (GET_CODE (x
) == POST_INC
)
24850 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
24851 else if (GET_CODE (x
) == PLUS
)
24853 gcc_assert (REG_P (XEXP (x
, 0)));
24854 if (CONST_INT_P (XEXP (x
, 1)))
24855 asm_fprintf (stream
, "[%r, #%wd]",
24856 REGNO (XEXP (x
, 0)),
24857 INTVAL (XEXP (x
, 1)));
24859 asm_fprintf (stream
, "[%r, %r]",
24860 REGNO (XEXP (x
, 0)),
24861 REGNO (XEXP (x
, 1)));
24864 output_addr_const (stream
, x
);
24868 /* Target hook for indicating whether a punctuation character for
24869 TARGET_PRINT_OPERAND is valid. */
24871 arm_print_operand_punct_valid_p (unsigned char code
)
24873 return (code
== '@' || code
== '|' || code
== '.'
24874 || code
== '(' || code
== ')' || code
== '#'
24875 || (TARGET_32BIT
&& (code
== '?'))
24876 || (TARGET_THUMB2
&& (code
== '!'))
24877 || (TARGET_THUMB
&& (code
== '_')));
24880 /* Target hook for assembling integer objects. The ARM version needs to
24881 handle word-sized values specially. */
24883 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
24887 if (size
== UNITS_PER_WORD
&& aligned_p
)
24889 fputs ("\t.word\t", asm_out_file
);
24890 output_addr_const (asm_out_file
, x
);
24892 /* Mark symbols as position independent. We only do this in the
24893 .text segment, not in the .data segment. */
24894 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
24895 (SYMBOL_REF_P (x
) || LABEL_REF_P (x
)))
24897 /* See legitimize_pic_address for an explanation of the
24898 TARGET_VXWORKS_RTP check. */
24899 /* References to weak symbols cannot be resolved locally:
24900 they may be overridden by a non-weak definition at link
24902 if (!arm_pic_data_is_text_relative
24903 || (SYMBOL_REF_P (x
)
24904 && (!SYMBOL_REF_LOCAL_P (x
)
24905 || (SYMBOL_REF_DECL (x
)
24906 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0)
24907 || (SYMBOL_REF_FUNCTION_P (x
)
24908 && !arm_fdpic_local_funcdesc_p (x
)))))
24910 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
24911 fputs ("(GOTFUNCDESC)", asm_out_file
);
24913 fputs ("(GOT)", asm_out_file
);
24917 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
24918 fputs ("(GOTOFFFUNCDESC)", asm_out_file
);
24924 || arm_is_segment_info_known (x
, &is_readonly
))
24925 fputs ("(GOTOFF)", asm_out_file
);
24927 fputs ("(GOT)", asm_out_file
);
24932 /* For FDPIC we also have to mark symbol for .data section. */
24934 && !making_const_table
24935 && SYMBOL_REF_P (x
)
24936 && SYMBOL_REF_FUNCTION_P (x
))
24937 fputs ("(FUNCDESC)", asm_out_file
);
24939 fputc ('\n', asm_out_file
);
24943 mode
= GET_MODE (x
);
24945 if (arm_vector_mode_supported_p (mode
))
24949 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
24951 units
= CONST_VECTOR_NUNITS (x
);
24952 size
= GET_MODE_UNIT_SIZE (mode
);
24954 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
24955 for (i
= 0; i
< units
; i
++)
24957 rtx elt
= CONST_VECTOR_ELT (x
, i
);
24959 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
24962 for (i
= 0; i
< units
; i
++)
24964 rtx elt
= CONST_VECTOR_ELT (x
, i
);
24966 (*CONST_DOUBLE_REAL_VALUE (elt
),
24967 as_a
<scalar_float_mode
> (GET_MODE_INNER (mode
)),
24968 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
24974 return default_assemble_integer (x
, size
, aligned_p
);
24978 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
24982 if (!TARGET_AAPCS_BASED
)
24985 default_named_section_asm_out_constructor
24986 : default_named_section_asm_out_destructor
) (symbol
, priority
);
24990 /* Put these in the .init_array section, using a special relocation. */
24991 if (priority
!= DEFAULT_INIT_PRIORITY
)
24994 sprintf (buf
, "%s.%.5u",
24995 is_ctor
? ".init_array" : ".fini_array",
24997 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
25004 switch_to_section (s
);
25005 assemble_align (POINTER_SIZE
);
25006 fputs ("\t.word\t", asm_out_file
);
25007 output_addr_const (asm_out_file
, symbol
);
25008 fputs ("(target1)\n", asm_out_file
);
25011 /* Add a function to the list of static constructors. */
25014 arm_elf_asm_constructor (rtx symbol
, int priority
)
25016 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
25019 /* Add a function to the list of static destructors. */
25022 arm_elf_asm_destructor (rtx symbol
, int priority
)
25024 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
25027 /* A finite state machine takes care of noticing whether or not instructions
25028 can be conditionally executed, and thus decrease execution time and code
25029 size by deleting branch instructions. The fsm is controlled by
25030 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
25032 /* The state of the fsm controlling condition codes are:
25033 0: normal, do nothing special
25034 1: make ASM_OUTPUT_OPCODE not output this instruction
25035 2: make ASM_OUTPUT_OPCODE not output this instruction
25036 3: make instructions conditional
25037 4: make instructions conditional
25039 State transitions (state->state by whom under condition):
25040 0 -> 1 final_prescan_insn if the `target' is a label
25041 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
25042 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
25043 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
25044 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
25045 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
25046 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
25047 (the target insn is arm_target_insn).
25049 If the jump clobbers the conditions then we use states 2 and 4.
25051 A similar thing can be done with conditional return insns.
25053 XXX In case the `target' is an unconditional branch, this conditionalising
25054 of the instructions always reduces code size, but not always execution
25055 time. But then, I want to reduce the code size to somewhere near what
25056 /bin/cc produces. */
25058 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
25059 instructions. When a COND_EXEC instruction is seen the subsequent
25060 instructions are scanned so that multiple conditional instructions can be
25061 combined into a single IT block. arm_condexec_count and arm_condexec_mask
25062 specify the length and true/false mask for the IT block. These will be
25063 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
25065 /* Returns the index of the ARM condition code string in
25066 `arm_condition_codes', or ARM_NV if the comparison is invalid.
25067 COMPARISON should be an rtx like `(eq (...) (...))'. */
25070 maybe_get_arm_condition_code (rtx comparison
)
25072 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
25073 enum arm_cond_code code
;
25074 enum rtx_code comp_code
= GET_CODE (comparison
);
25076 if (GET_MODE_CLASS (mode
) != MODE_CC
)
25077 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
25078 XEXP (comparison
, 1));
25082 case E_CC_DNEmode
: code
= ARM_NE
; goto dominance
;
25083 case E_CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
25084 case E_CC_DGEmode
: code
= ARM_GE
; goto dominance
;
25085 case E_CC_DGTmode
: code
= ARM_GT
; goto dominance
;
25086 case E_CC_DLEmode
: code
= ARM_LE
; goto dominance
;
25087 case E_CC_DLTmode
: code
= ARM_LT
; goto dominance
;
25088 case E_CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
25089 case E_CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
25090 case E_CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
25091 case E_CC_DLTUmode
: code
= ARM_CC
;
25094 if (comp_code
== EQ
)
25095 return ARM_INVERSE_CONDITION_CODE (code
);
25096 if (comp_code
== NE
)
25103 case NE
: return ARM_NE
;
25104 case EQ
: return ARM_EQ
;
25105 case GE
: return ARM_PL
;
25106 case LT
: return ARM_MI
;
25107 default: return ARM_NV
;
25113 case NE
: return ARM_NE
;
25114 case EQ
: return ARM_EQ
;
25115 default: return ARM_NV
;
25121 case NE
: return ARM_MI
;
25122 case EQ
: return ARM_PL
;
25123 default: return ARM_NV
;
25128 /* We can handle all cases except UNEQ and LTGT. */
25131 case GE
: return ARM_GE
;
25132 case GT
: return ARM_GT
;
25133 case LE
: return ARM_LS
;
25134 case LT
: return ARM_MI
;
25135 case NE
: return ARM_NE
;
25136 case EQ
: return ARM_EQ
;
25137 case ORDERED
: return ARM_VC
;
25138 case UNORDERED
: return ARM_VS
;
25139 case UNLT
: return ARM_LT
;
25140 case UNLE
: return ARM_LE
;
25141 case UNGT
: return ARM_HI
;
25142 case UNGE
: return ARM_PL
;
25143 /* UNEQ and LTGT do not have a representation. */
25144 case UNEQ
: /* Fall through. */
25145 case LTGT
: /* Fall through. */
25146 default: return ARM_NV
;
25152 case NE
: return ARM_NE
;
25153 case EQ
: return ARM_EQ
;
25154 case GE
: return ARM_LE
;
25155 case GT
: return ARM_LT
;
25156 case LE
: return ARM_GE
;
25157 case LT
: return ARM_GT
;
25158 case GEU
: return ARM_LS
;
25159 case GTU
: return ARM_CC
;
25160 case LEU
: return ARM_CS
;
25161 case LTU
: return ARM_HI
;
25162 default: return ARM_NV
;
25168 case LTU
: return ARM_CS
;
25169 case GEU
: return ARM_CC
;
25170 default: return ARM_NV
;
25176 case GE
: return ARM_GE
;
25177 case LT
: return ARM_LT
;
25178 default: return ARM_NV
;
25184 case GEU
: return ARM_CS
;
25185 case LTU
: return ARM_CC
;
25186 default: return ARM_NV
;
25192 case NE
: return ARM_VS
;
25193 case EQ
: return ARM_VC
;
25194 default: return ARM_NV
;
25200 case GEU
: return ARM_CS
;
25201 case LTU
: return ARM_CC
;
25202 default: return ARM_NV
;
25209 case NE
: return ARM_NE
;
25210 case EQ
: return ARM_EQ
;
25211 case GE
: return ARM_GE
;
25212 case GT
: return ARM_GT
;
25213 case LE
: return ARM_LE
;
25214 case LT
: return ARM_LT
;
25215 case GEU
: return ARM_CS
;
25216 case GTU
: return ARM_HI
;
25217 case LEU
: return ARM_LS
;
25218 case LTU
: return ARM_CC
;
25219 default: return ARM_NV
;
25222 default: gcc_unreachable ();
25226 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
25227 static enum arm_cond_code
25228 get_arm_condition_code (rtx comparison
)
25230 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
25231 gcc_assert (code
!= ARM_NV
);
25235 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
25236 code registers when not targetting Thumb1. The VFP condition register
25237 only exists when generating hard-float code. */
25239 arm_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
25245 *p2
= TARGET_VFP_BASE
? VFPCC_REGNUM
: INVALID_REGNUM
;
25249 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25252 thumb2_final_prescan_insn (rtx_insn
*insn
)
25254 rtx_insn
*first_insn
= insn
;
25255 rtx body
= PATTERN (insn
);
25257 enum arm_cond_code code
;
25262 /* max_insns_skipped in the tune was already taken into account in the
25263 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
25264 just emit the IT blocks as we can. It does not make sense to split
25266 max
= MAX_INSN_PER_IT_BLOCK
;
25268 /* Remove the previous insn from the count of insns to be output. */
25269 if (arm_condexec_count
)
25270 arm_condexec_count
--;
25272 /* Nothing to do if we are already inside a conditional block. */
25273 if (arm_condexec_count
)
25276 if (GET_CODE (body
) != COND_EXEC
)
25279 /* Conditional jumps are implemented directly. */
25283 predicate
= COND_EXEC_TEST (body
);
25284 arm_current_cc
= get_arm_condition_code (predicate
);
25286 n
= get_attr_ce_count (insn
);
25287 arm_condexec_count
= 1;
25288 arm_condexec_mask
= (1 << n
) - 1;
25289 arm_condexec_masklen
= n
;
25290 /* See if subsequent instructions can be combined into the same block. */
25293 insn
= next_nonnote_insn (insn
);
25295 /* Jumping into the middle of an IT block is illegal, so a label or
25296 barrier terminates the block. */
25297 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
25300 body
= PATTERN (insn
);
25301 /* USE and CLOBBER aren't really insns, so just skip them. */
25302 if (GET_CODE (body
) == USE
25303 || GET_CODE (body
) == CLOBBER
)
25306 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
25307 if (GET_CODE (body
) != COND_EXEC
)
25309 /* Maximum number of conditionally executed instructions in a block. */
25310 n
= get_attr_ce_count (insn
);
25311 if (arm_condexec_masklen
+ n
> max
)
25314 predicate
= COND_EXEC_TEST (body
);
25315 code
= get_arm_condition_code (predicate
);
25316 mask
= (1 << n
) - 1;
25317 if (arm_current_cc
== code
)
25318 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
25319 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
25322 arm_condexec_count
++;
25323 arm_condexec_masklen
+= n
;
25325 /* A jump must be the last instruction in a conditional block. */
25329 /* Restore recog_data (getting the attributes of other insns can
25330 destroy this array, but final.cc assumes that it remains intact
25331 across this call). */
25332 extract_constrain_insn_cached (first_insn
);
25336 arm_final_prescan_insn (rtx_insn
*insn
)
25338 /* BODY will hold the body of INSN. */
25339 rtx body
= PATTERN (insn
);
25341 /* This will be 1 if trying to repeat the trick, and things need to be
25342 reversed if it appears to fail. */
25345 /* If we start with a return insn, we only succeed if we find another one. */
25346 int seeking_return
= 0;
25347 enum rtx_code return_code
= UNKNOWN
;
25349 /* START_INSN will hold the insn from where we start looking. This is the
25350 first insn after the following code_label if REVERSE is true. */
25351 rtx_insn
*start_insn
= insn
;
25353 /* If in state 4, check if the target branch is reached, in order to
25354 change back to state 0. */
25355 if (arm_ccfsm_state
== 4)
25357 if (insn
== arm_target_insn
)
25359 arm_target_insn
= NULL
;
25360 arm_ccfsm_state
= 0;
25365 /* If in state 3, it is possible to repeat the trick, if this insn is an
25366 unconditional branch to a label, and immediately following this branch
25367 is the previous target label which is only used once, and the label this
25368 branch jumps to is not too far off. */
25369 if (arm_ccfsm_state
== 3)
25371 if (simplejump_p (insn
))
25373 start_insn
= next_nonnote_insn (start_insn
);
25374 if (BARRIER_P (start_insn
))
25376 /* XXX Isn't this always a barrier? */
25377 start_insn
= next_nonnote_insn (start_insn
);
25379 if (LABEL_P (start_insn
)
25380 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
25381 && LABEL_NUSES (start_insn
) == 1)
25386 else if (ANY_RETURN_P (body
))
25388 start_insn
= next_nonnote_insn (start_insn
);
25389 if (BARRIER_P (start_insn
))
25390 start_insn
= next_nonnote_insn (start_insn
);
25391 if (LABEL_P (start_insn
)
25392 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
25393 && LABEL_NUSES (start_insn
) == 1)
25396 seeking_return
= 1;
25397 return_code
= GET_CODE (body
);
25406 gcc_assert (!arm_ccfsm_state
|| reverse
);
25407 if (!JUMP_P (insn
))
25410 /* This jump might be paralleled with a clobber of the condition codes
25411 the jump should always come first */
25412 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
25413 body
= XVECEXP (body
, 0, 0);
25416 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
25417 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
25420 int fail
= FALSE
, succeed
= FALSE
;
25421 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25422 int then_not_else
= TRUE
;
25423 rtx_insn
*this_insn
= start_insn
;
25426 /* Register the insn jumped to. */
25429 if (!seeking_return
)
25430 label
= XEXP (SET_SRC (body
), 0);
25432 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
25433 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
25434 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
25436 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
25437 then_not_else
= FALSE
;
25439 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
25441 seeking_return
= 1;
25442 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
25444 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
25446 seeking_return
= 1;
25447 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
25448 then_not_else
= FALSE
;
25451 gcc_unreachable ();
25453 /* See how many insns this branch skips, and what kind of insns. If all
25454 insns are okay, and the label or unconditional branch to the same
25455 label is not too far away, succeed. */
25456 for (insns_skipped
= 0;
25457 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
25461 this_insn
= next_nonnote_insn (this_insn
);
25465 switch (GET_CODE (this_insn
))
25468 /* Succeed if it is the target label, otherwise fail since
25469 control falls in from somewhere else. */
25470 if (this_insn
== label
)
25472 arm_ccfsm_state
= 1;
25480 /* Succeed if the following insn is the target label.
25482 If return insns are used then the last insn in a function
25483 will be a barrier. */
25484 this_insn
= next_nonnote_insn (this_insn
);
25485 if (this_insn
&& this_insn
== label
)
25487 arm_ccfsm_state
= 1;
25495 /* The AAPCS says that conditional calls should not be
25496 used since they make interworking inefficient (the
25497 linker can't transform BL<cond> into BLX). That's
25498 only a problem if the machine has BLX. */
25505 /* Succeed if the following insn is the target label, or
25506 if the following two insns are a barrier and the
25508 this_insn
= next_nonnote_insn (this_insn
);
25509 if (this_insn
&& BARRIER_P (this_insn
))
25510 this_insn
= next_nonnote_insn (this_insn
);
25512 if (this_insn
&& this_insn
== label
25513 && insns_skipped
< max_insns_skipped
)
25515 arm_ccfsm_state
= 1;
25523 /* If this is an unconditional branch to the same label, succeed.
25524 If it is to another label, do nothing. If it is conditional,
25526 /* XXX Probably, the tests for SET and the PC are
25529 scanbody
= PATTERN (this_insn
);
25530 if (GET_CODE (scanbody
) == SET
25531 && GET_CODE (SET_DEST (scanbody
)) == PC
)
25533 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
25534 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
25536 arm_ccfsm_state
= 2;
25539 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
25542 /* Fail if a conditional return is undesirable (e.g. on a
25543 StrongARM), but still allow this if optimizing for size. */
25544 else if (GET_CODE (scanbody
) == return_code
25545 && !use_return_insn (TRUE
, NULL
)
25548 else if (GET_CODE (scanbody
) == return_code
)
25550 arm_ccfsm_state
= 2;
25553 else if (GET_CODE (scanbody
) == PARALLEL
)
25555 switch (get_attr_conds (this_insn
))
25565 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
25570 /* Instructions using or affecting the condition codes make it
25572 scanbody
= PATTERN (this_insn
);
25573 if (!(GET_CODE (scanbody
) == SET
25574 || GET_CODE (scanbody
) == PARALLEL
)
25575 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
25585 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
25586 arm_target_label
= CODE_LABEL_NUMBER (label
);
25589 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
25591 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
25593 this_insn
= next_nonnote_insn (this_insn
);
25594 gcc_assert (!this_insn
25595 || (!BARRIER_P (this_insn
)
25596 && !LABEL_P (this_insn
)));
25600 /* Oh, dear! we ran off the end.. give up. */
25601 extract_constrain_insn_cached (insn
);
25602 arm_ccfsm_state
= 0;
25603 arm_target_insn
= NULL
;
25606 arm_target_insn
= this_insn
;
25609 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25612 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
25614 if (reverse
|| then_not_else
)
25615 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
25618 /* Restore recog_data (getting the attributes of other insns can
25619 destroy this array, but final.cc assumes that it remains intact
25620 across this call. */
25621 extract_constrain_insn_cached (insn
);
25625 /* Output IT instructions. */
25627 thumb2_asm_output_opcode (FILE * stream
)
25632 if (arm_condexec_mask
)
25634 for (n
= 0; n
< arm_condexec_masklen
; n
++)
25635 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
25637 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
25638 arm_condition_codes
[arm_current_cc
]);
25639 arm_condexec_mask
= 0;
25643 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25644 UNITS_PER_WORD bytes wide. */
25645 static unsigned int
25646 arm_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
25648 if (IS_VPR_REGNUM (regno
))
25649 return CEIL (GET_MODE_SIZE (mode
), 2);
25652 && regno
> PC_REGNUM
25653 && regno
!= FRAME_POINTER_REGNUM
25654 && regno
!= ARG_POINTER_REGNUM
25655 && !IS_VFP_REGNUM (regno
))
25658 return ARM_NUM_REGS (mode
);
25661 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25663 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
25665 if (GET_MODE_CLASS (mode
) == MODE_CC
)
25666 return (regno
== CC_REGNUM
25667 || (TARGET_VFP_BASE
25668 && regno
== VFPCC_REGNUM
));
25670 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
25673 if (IS_VPR_REGNUM (regno
))
25674 return VALID_MVE_PRED_MODE (mode
);
25677 /* For the Thumb we only allow values bigger than SImode in
25678 registers 0 - 6, so that there is always a second low
25679 register available to hold the upper part of the value.
25680 We probably we ought to ensure that the register is the
25681 start of an even numbered register pair. */
25682 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
25684 if (TARGET_VFP_BASE
&& IS_VFP_REGNUM (regno
))
25686 if (mode
== DFmode
|| mode
== DImode
)
25687 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
25689 if (mode
== HFmode
|| mode
== BFmode
|| mode
== HImode
25690 || mode
== SFmode
|| mode
== SImode
)
25691 return VFP_REGNO_OK_FOR_SINGLE (regno
);
25694 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
25695 || (VALID_NEON_QREG_MODE (mode
)
25696 && NEON_REGNO_OK_FOR_QUAD (regno
))
25697 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
25698 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
25699 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25700 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
25701 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
25702 if (TARGET_HAVE_MVE
)
25703 return ((VALID_MVE_MODE (mode
) && NEON_REGNO_OK_FOR_QUAD (regno
))
25704 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25705 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8)));
25710 if (TARGET_REALLY_IWMMXT
)
25712 if (IS_IWMMXT_GR_REGNUM (regno
))
25713 return mode
== SImode
;
25715 if (IS_IWMMXT_REGNUM (regno
))
25716 return VALID_IWMMXT_REG_MODE (mode
);
25719 /* We allow almost any value to be stored in the general registers.
25720 Restrict doubleword quantities to even register pairs in ARM state
25721 so that we can use ldrd. The same restriction applies for MVE
25722 in order to support Armv8.1-M Mainline instructions.
25723 Do not allow very large Neon structure opaque modes in general
25724 registers; they would use too many. */
25725 if (regno
<= LAST_ARM_REGNUM
)
25727 if (ARM_NUM_REGS (mode
) > 4)
25730 if (TARGET_THUMB2
&& !(TARGET_HAVE_MVE
|| TARGET_CDE
))
25733 return !((TARGET_LDRD
|| TARGET_CDE
)
25734 && GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
25737 if (regno
== FRAME_POINTER_REGNUM
25738 || regno
== ARG_POINTER_REGNUM
)
25739 /* We only allow integers in the fake hard registers. */
25740 return GET_MODE_CLASS (mode
) == MODE_INT
;
25745 /* Implement TARGET_MODES_TIEABLE_P. */
25748 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
25750 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
25753 if (TARGET_HAVE_MVE
25754 && (VALID_MVE_PRED_MODE (mode1
) && VALID_MVE_PRED_MODE (mode2
)))
25757 /* We specifically want to allow elements of "structure" modes to
25758 be tieable to the structure. This more general condition allows
25759 other rarer situations too. */
25761 && (VALID_NEON_DREG_MODE (mode1
)
25762 || VALID_NEON_QREG_MODE (mode1
)
25763 || VALID_NEON_STRUCT_MODE (mode1
))
25764 && (VALID_NEON_DREG_MODE (mode2
)
25765 || VALID_NEON_QREG_MODE (mode2
)
25766 || VALID_NEON_STRUCT_MODE (mode2
)))
25767 || (TARGET_HAVE_MVE
25768 && (VALID_MVE_MODE (mode1
)
25769 || VALID_MVE_STRUCT_MODE (mode1
))
25770 && (VALID_MVE_MODE (mode2
)
25771 || VALID_MVE_STRUCT_MODE (mode2
))))
25777 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25778 not used in arm mode. */
25781 arm_regno_class (int regno
)
25783 if (regno
== PC_REGNUM
)
25786 if (IS_VPR_REGNUM (regno
))
25789 if (IS_PAC_REGNUM (regno
))
25794 if (regno
== STACK_POINTER_REGNUM
)
25796 if (regno
== CC_REGNUM
)
25803 if (TARGET_THUMB2
&& regno
< 8)
25806 if ( regno
<= LAST_ARM_REGNUM
25807 || regno
== FRAME_POINTER_REGNUM
25808 || regno
== ARG_POINTER_REGNUM
)
25809 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
25811 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
25812 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
25814 if (IS_VFP_REGNUM (regno
))
25816 if (regno
<= D7_VFP_REGNUM
)
25817 return VFP_D0_D7_REGS
;
25818 else if (regno
<= LAST_LO_VFP_REGNUM
)
25819 return VFP_LO_REGS
;
25821 return VFP_HI_REGS
;
25824 if (IS_IWMMXT_REGNUM (regno
))
25825 return IWMMXT_REGS
;
25827 if (IS_IWMMXT_GR_REGNUM (regno
))
25828 return IWMMXT_GR_REGS
;
25833 /* Handle a special case when computing the offset
25834 of an argument from the frame pointer. */
25836 arm_debugger_arg_offset (int value
, rtx addr
)
25840 /* We are only interested if dbxout_parms() failed to compute the offset. */
25844 /* We can only cope with the case where the address is held in a register. */
25848 /* If we are using the frame pointer to point at the argument, then
25849 an offset of 0 is correct. */
25850 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
25853 /* If we are using the stack pointer to point at the
25854 argument, then an offset of 0 is correct. */
25855 /* ??? Check this is consistent with thumb2 frame layout. */
25856 if ((TARGET_THUMB
|| !frame_pointer_needed
)
25857 && REGNO (addr
) == SP_REGNUM
)
25860 /* Oh dear. The argument is pointed to by a register rather
25861 than being held in a register, or being stored at a known
25862 offset from the frame pointer. Since GDB only understands
25863 those two kinds of argument we must translate the address
25864 held in the register into an offset from the frame pointer.
25865 We do this by searching through the insns for the function
25866 looking to see where this register gets its value. If the
25867 register is initialized from the frame pointer plus an offset
25868 then we are in luck and we can continue, otherwise we give up.
25870 This code is exercised by producing debugging information
25871 for a function with arguments like this:
25873 double func (double a, double b, int c, double d) {return d;}
25875 Without this code the stab for parameter 'd' will be set to
25876 an offset of 0 from the frame pointer, rather than 8. */
25878 /* The if() statement says:
25880 If the insn is a normal instruction
25881 and if the insn is setting the value in a register
25882 and if the register being set is the register holding the address of the argument
25883 and if the address is computing by an addition
25884 that involves adding to a register
25885 which is the frame pointer
25890 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
25892 if ( NONJUMP_INSN_P (insn
)
25893 && GET_CODE (PATTERN (insn
)) == SET
25894 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
25895 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
25896 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
25897 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25898 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
25901 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
25910 warning (0, "unable to compute real location of stacked parameter");
25911 value
= 8; /* XXX magic hack */
25917 /* Implement TARGET_PROMOTED_TYPE. */
25920 arm_promoted_type (const_tree t
)
25922 if (SCALAR_FLOAT_TYPE_P (t
)
25923 && TYPE_PRECISION (t
) == 16
25924 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
25925 return float_type_node
;
25929 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25930 This simply adds HFmode as a supported mode; even though we don't
25931 implement arithmetic on this type directly, it's supported by
25932 optabs conversions, much the way the double-word arithmetic is
25933 special-cased in the default hook. */
25936 arm_scalar_mode_supported_p (scalar_mode mode
)
25938 if (mode
== HFmode
)
25939 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
25940 else if (ALL_FIXED_POINT_MODE_P (mode
))
25943 return default_scalar_mode_supported_p (mode
);
25946 /* Set the value of FLT_EVAL_METHOD.
25947 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25949 0: evaluate all operations and constants, whose semantic type has at
25950 most the range and precision of type float, to the range and
25951 precision of float; evaluate all other operations and constants to
25952 the range and precision of the semantic type;
25954 N, where _FloatN is a supported interchange floating type
25955 evaluate all operations and constants, whose semantic type has at
25956 most the range and precision of _FloatN type, to the range and
25957 precision of the _FloatN type; evaluate all other operations and
25958 constants to the range and precision of the semantic type;
25960 If we have the ARMv8.2-A extensions then we support _Float16 in native
25961 precision, so we should set this to 16. Otherwise, we support the type,
25962 but want to evaluate expressions in float precision, so set this to
25965 static enum flt_eval_method
25966 arm_excess_precision (enum excess_precision_type type
)
25970 case EXCESS_PRECISION_TYPE_FAST
:
25971 case EXCESS_PRECISION_TYPE_STANDARD
:
25972 /* We can calculate either in 16-bit range and precision or
25973 32-bit range and precision. Make that decision based on whether
25974 we have native support for the ARMv8.2-A 16-bit floating-point
25975 instructions or not. */
25976 return (TARGET_VFP_FP16INST
25977 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25978 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
25979 case EXCESS_PRECISION_TYPE_IMPLICIT
:
25980 case EXCESS_PRECISION_TYPE_FLOAT16
:
25981 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
25983 gcc_unreachable ();
25985 return FLT_EVAL_METHOD_UNPREDICTABLE
;
25989 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
25990 _Float16 if we are using anything other than ieee format for 16-bit
25991 floating point. Otherwise, punt to the default implementation. */
25992 static opt_scalar_float_mode
25993 arm_floatn_mode (int n
, bool extended
)
25995 if (!extended
&& n
== 16)
25997 if (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
)
25999 return opt_scalar_float_mode ();
26002 return default_floatn_mode (n
, extended
);
26006 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
26007 not to early-clobber SRC registers in the process.
26009 We assume that the operands described by SRC and DEST represent a
26010 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
26011 number of components into which the copy has been decomposed. */
26013 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
26017 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
26018 || REGNO (operands
[0]) < REGNO (operands
[1]))
26020 for (i
= 0; i
< count
; i
++)
26022 operands
[2 * i
] = dest
[i
];
26023 operands
[2 * i
+ 1] = src
[i
];
26028 for (i
= 0; i
< count
; i
++)
26030 operands
[2 * i
] = dest
[count
- i
- 1];
26031 operands
[2 * i
+ 1] = src
[count
- i
- 1];
26036 /* Split operands into moves from op[1] + op[2] into op[0]. */
26039 neon_split_vcombine (rtx operands
[3])
26041 unsigned int dest
= REGNO (operands
[0]);
26042 unsigned int src1
= REGNO (operands
[1]);
26043 unsigned int src2
= REGNO (operands
[2]);
26044 machine_mode halfmode
= GET_MODE (operands
[1]);
26045 unsigned int halfregs
= REG_NREGS (operands
[1]);
26046 rtx destlo
, desthi
;
26048 if (src1
== dest
&& src2
== dest
+ halfregs
)
26050 /* No-op move. Can't split to nothing; emit something. */
26051 emit_note (NOTE_INSN_DELETED
);
26055 /* Preserve register attributes for variable tracking. */
26056 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
26057 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
26058 GET_MODE_SIZE (halfmode
));
26060 /* Special case of reversed high/low parts. Use VSWP. */
26061 if (src2
== dest
&& src1
== dest
+ halfregs
)
26063 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
26064 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
26065 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
26069 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
26071 /* Try to avoid unnecessary moves if part of the result
26072 is in the right place already. */
26074 emit_move_insn (destlo
, operands
[1]);
26075 if (src2
!= dest
+ halfregs
)
26076 emit_move_insn (desthi
, operands
[2]);
26080 if (src2
!= dest
+ halfregs
)
26081 emit_move_insn (desthi
, operands
[2]);
26083 emit_move_insn (destlo
, operands
[1]);
26087 /* Return the number (counting from 0) of
26088 the least significant set bit in MASK. */
26091 number_of_first_bit_set (unsigned mask
)
26093 return ctz_hwi (mask
);
26096 /* Like emit_multi_reg_push, but allowing for a different set of
26097 registers to be described as saved. MASK is the set of registers
26098 to be saved; REAL_REGS is the set of registers to be described as
26099 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26102 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
26104 unsigned long regno
;
26105 rtx par
[10], tmp
, reg
;
26109 /* Build the parallel of the registers actually being stored. */
26110 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
26112 regno
= ctz_hwi (mask
);
26113 reg
= gen_rtx_REG (SImode
, regno
);
26116 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
26118 tmp
= gen_rtx_USE (VOIDmode
, reg
);
26123 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26124 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
26125 tmp
= gen_frame_mem (BLKmode
, tmp
);
26126 tmp
= gen_rtx_SET (tmp
, par
[0]);
26129 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
26130 insn
= emit_insn (tmp
);
26132 /* Always build the stack adjustment note for unwind info. */
26133 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26134 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
26137 /* Build the parallel of the registers recorded as saved for unwind. */
26138 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
26140 regno
= ctz_hwi (real_regs
);
26141 reg
= gen_rtx_REG (SImode
, regno
);
26143 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
26144 tmp
= gen_frame_mem (SImode
, tmp
);
26145 tmp
= gen_rtx_SET (tmp
, reg
);
26146 RTX_FRAME_RELATED_P (tmp
) = 1;
26154 RTX_FRAME_RELATED_P (par
[0]) = 1;
26155 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
26158 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
26163 /* Emit code to push or pop registers to or from the stack. F is the
26164 assembly file. MASK is the registers to pop. */
26166 thumb_pop (FILE *f
, unsigned long mask
)
26169 int lo_mask
= mask
& 0xFF;
26173 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
26175 /* Special case. Do not generate a POP PC statement here, do it in
26177 thumb_exit (f
, -1);
26181 fprintf (f
, "\tpop\t{");
26183 /* Look at the low registers first. */
26184 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
26188 asm_fprintf (f
, "%r", regno
);
26190 if ((lo_mask
& ~1) != 0)
26195 if (mask
& (1 << PC_REGNUM
))
26197 /* Catch popping the PC. */
26198 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
26199 || IS_CMSE_ENTRY (arm_current_func_type ()))
26201 /* The PC is never poped directly, instead
26202 it is popped into r3 and then BX is used. */
26203 fprintf (f
, "}\n");
26205 thumb_exit (f
, -1);
26214 asm_fprintf (f
, "%r", PC_REGNUM
);
26218 fprintf (f
, "}\n");
26221 /* Generate code to return from a thumb function.
26222 If 'reg_containing_return_addr' is -1, then the return address is
26223 actually on the stack, at the stack pointer.
26225 Note: do not forget to update length attribute of corresponding insn pattern
26226 when changing assembly output (eg. length attribute of epilogue_insns when
26227 updating Armv8-M Baseline Security Extensions register clearing
26230 thumb_exit (FILE *f
, int reg_containing_return_addr
)
26232 unsigned regs_available_for_popping
;
26233 unsigned regs_to_pop
;
26235 unsigned available
;
26239 int restore_a4
= FALSE
;
26241 /* Compute the registers we need to pop. */
26245 if (reg_containing_return_addr
== -1)
26247 regs_to_pop
|= 1 << LR_REGNUM
;
26251 if (TARGET_BACKTRACE
)
26253 /* Restore the (ARM) frame pointer and stack pointer. */
26254 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
26258 /* If there is nothing to pop then just emit the BX instruction and
26260 if (pops_needed
== 0)
26262 if (crtl
->calls_eh_return
)
26263 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26265 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26267 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26268 emitted by cmse_nonsecure_entry_clear_before_return (). */
26269 if (!TARGET_HAVE_FPCXT_CMSE
)
26270 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
26271 reg_containing_return_addr
);
26272 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
26275 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26278 /* Otherwise if we are not supporting interworking and we have not created
26279 a backtrace structure and the function was not entered in ARM mode then
26280 just pop the return address straight into the PC. */
26281 else if (!TARGET_INTERWORK
26282 && !TARGET_BACKTRACE
26283 && !is_called_in_ARM_mode (current_function_decl
)
26284 && !crtl
->calls_eh_return
26285 && !IS_CMSE_ENTRY (arm_current_func_type ()))
26287 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
26291 /* Find out how many of the (return) argument registers we can corrupt. */
26292 regs_available_for_popping
= 0;
26294 /* If returning via __builtin_eh_return, the bottom three registers
26295 all contain information needed for the return. */
26296 if (crtl
->calls_eh_return
)
26300 /* If we can deduce the registers used from the function's
26301 return value. This is more reliable that examining
26302 df_regs_ever_live_p () because that will be set if the register is
26303 ever used in the function, not just if the register is used
26304 to hold a return value. */
26306 if (crtl
->return_rtx
!= 0)
26307 mode
= GET_MODE (crtl
->return_rtx
);
26309 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
26311 size
= GET_MODE_SIZE (mode
);
26315 /* In a void function we can use any argument register.
26316 In a function that returns a structure on the stack
26317 we can use the second and third argument registers. */
26318 if (mode
== VOIDmode
)
26319 regs_available_for_popping
=
26320 (1 << ARG_REGISTER (1))
26321 | (1 << ARG_REGISTER (2))
26322 | (1 << ARG_REGISTER (3));
26324 regs_available_for_popping
=
26325 (1 << ARG_REGISTER (2))
26326 | (1 << ARG_REGISTER (3));
26328 else if (size
<= 4)
26329 regs_available_for_popping
=
26330 (1 << ARG_REGISTER (2))
26331 | (1 << ARG_REGISTER (3));
26332 else if (size
<= 8)
26333 regs_available_for_popping
=
26334 (1 << ARG_REGISTER (3));
26337 /* Match registers to be popped with registers into which we pop them. */
26338 for (available
= regs_available_for_popping
,
26339 required
= regs_to_pop
;
26340 required
!= 0 && available
!= 0;
26341 available
&= ~(available
& - available
),
26342 required
&= ~(required
& - required
))
26345 /* If we have any popping registers left over, remove them. */
26347 regs_available_for_popping
&= ~available
;
26349 /* Otherwise if we need another popping register we can use
26350 the fourth argument register. */
26351 else if (pops_needed
)
26353 /* If we have not found any free argument registers and
26354 reg a4 contains the return address, we must move it. */
26355 if (regs_available_for_popping
== 0
26356 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
26358 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26359 reg_containing_return_addr
= LR_REGNUM
;
26361 else if (size
> 12)
26363 /* Register a4 is being used to hold part of the return value,
26364 but we have dire need of a free, low register. */
26367 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
26370 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
26372 /* The fourth argument register is available. */
26373 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
26379 /* Pop as many registers as we can. */
26380 thumb_pop (f
, regs_available_for_popping
);
26382 /* Process the registers we popped. */
26383 if (reg_containing_return_addr
== -1)
26385 /* The return address was popped into the lowest numbered register. */
26386 regs_to_pop
&= ~(1 << LR_REGNUM
);
26388 reg_containing_return_addr
=
26389 number_of_first_bit_set (regs_available_for_popping
);
26391 /* Remove this register for the mask of available registers, so that
26392 the return address will not be corrupted by further pops. */
26393 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
26396 /* If we popped other registers then handle them here. */
26397 if (regs_available_for_popping
)
26401 /* Work out which register currently contains the frame pointer. */
26402 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26404 /* Move it into the correct place. */
26405 asm_fprintf (f
, "\tmov\t%r, %r\n",
26406 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
26408 /* (Temporarily) remove it from the mask of popped registers. */
26409 regs_available_for_popping
&= ~(1 << frame_pointer
);
26410 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
26412 if (regs_available_for_popping
)
26416 /* We popped the stack pointer as well,
26417 find the register that contains it. */
26418 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26420 /* Move it into the stack register. */
26421 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
26423 /* At this point we have popped all necessary registers, so
26424 do not worry about restoring regs_available_for_popping
26425 to its correct value:
26427 assert (pops_needed == 0)
26428 assert (regs_available_for_popping == (1 << frame_pointer))
26429 assert (regs_to_pop == (1 << STACK_POINTER)) */
26433 /* Since we have just move the popped value into the frame
26434 pointer, the popping register is available for reuse, and
26435 we know that we still have the stack pointer left to pop. */
26436 regs_available_for_popping
|= (1 << frame_pointer
);
26440 /* If we still have registers left on the stack, but we no longer have
26441 any registers into which we can pop them, then we must move the return
26442 address into the link register and make available the register that
26444 if (regs_available_for_popping
== 0 && pops_needed
> 0)
26446 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
26448 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
26449 reg_containing_return_addr
);
26451 reg_containing_return_addr
= LR_REGNUM
;
26454 /* If we have registers left on the stack then pop some more.
26455 We know that at most we will want to pop FP and SP. */
26456 if (pops_needed
> 0)
26461 thumb_pop (f
, regs_available_for_popping
);
26463 /* We have popped either FP or SP.
26464 Move whichever one it is into the correct register. */
26465 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26466 move_to
= number_of_first_bit_set (regs_to_pop
);
26468 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
26472 /* If we still have not popped everything then we must have only
26473 had one register available to us and we are now popping the SP. */
26474 if (pops_needed
> 0)
26478 thumb_pop (f
, regs_available_for_popping
);
26480 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26482 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
26484 assert (regs_to_pop == (1 << STACK_POINTER))
26485 assert (pops_needed == 1)
26489 /* If necessary restore the a4 register. */
26492 if (reg_containing_return_addr
!= LR_REGNUM
)
26494 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26495 reg_containing_return_addr
= LR_REGNUM
;
26498 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
26501 if (crtl
->calls_eh_return
)
26502 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26504 /* Return to caller. */
26505 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26507 /* This is for the cases where LR is not being used to contain the return
26508 address. It may therefore contain information that we might not want
26509 to leak, hence it must be cleared. The value in R0 will never be a
26510 secret at this point, so it is safe to use it, see the clearing code
26511 in cmse_nonsecure_entry_clear_before_return (). */
26512 if (reg_containing_return_addr
!= LR_REGNUM
)
26513 asm_fprintf (f
, "\tmov\tlr, r0\n");
26515 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26516 by cmse_nonsecure_entry_clear_before_return (). */
26517 if (!TARGET_HAVE_FPCXT_CMSE
)
26518 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
26519 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
26522 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26525 /* Scan INSN just before assembler is output for it.
26526 For Thumb-1, we track the status of the condition codes; this
26527 information is used in the cbranchsi4_insn pattern. */
26529 thumb1_final_prescan_insn (rtx_insn
*insn
)
26531 if (flag_print_asm_name
)
26532 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
26533 INSN_ADDRESSES (INSN_UID (insn
)));
26534 /* Don't overwrite the previous setter when we get to a cbranch. */
26535 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
26537 enum attr_conds conds
;
26539 if (cfun
->machine
->thumb1_cc_insn
)
26541 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
26542 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
26545 conds
= get_attr_conds (insn
);
26546 if (conds
== CONDS_SET
)
26548 rtx set
= single_set (insn
);
26549 cfun
->machine
->thumb1_cc_insn
= insn
;
26550 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
26551 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
26552 cfun
->machine
->thumb1_cc_mode
= CC_NZmode
;
26553 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
26555 rtx src1
= XEXP (SET_SRC (set
), 1);
26556 if (src1
== const0_rtx
)
26557 cfun
->machine
->thumb1_cc_mode
= CCmode
;
26559 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
26561 /* Record the src register operand instead of dest because
26562 cprop_hardreg pass propagates src. */
26563 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
26566 else if (conds
!= CONDS_NOCOND
)
26567 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
26570 /* Check if unexpected far jump is used. */
26571 if (cfun
->machine
->lr_save_eliminated
26572 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26573 internal_error("Unexpected thumb1 far jump");
26577 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
26579 unsigned HOST_WIDE_INT mask
= 0xff;
26582 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
26583 if (val
== 0) /* XXX */
26586 for (i
= 0; i
< 25; i
++)
26587 if ((val
& (mask
<< i
)) == val
)
26593 /* Returns nonzero if the current function contains,
26594 or might contain a far jump. */
26596 thumb_far_jump_used_p (void)
26599 bool far_jump
= false;
26600 unsigned int func_size
= 0;
26602 /* If we have already decided that far jumps may be used,
26603 do not bother checking again, and always return true even if
26604 it turns out that they are not being used. Once we have made
26605 the decision that far jumps are present (and that hence the link
26606 register will be pushed onto the stack) we cannot go back on it. */
26607 if (cfun
->machine
->far_jump_used
)
26610 /* If this function is not being called from the prologue/epilogue
26611 generation code then it must be being called from the
26612 INITIAL_ELIMINATION_OFFSET macro. */
26613 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
26615 /* In this case we know that we are being asked about the elimination
26616 of the arg pointer register. If that register is not being used,
26617 then there are no arguments on the stack, and we do not have to
26618 worry that a far jump might force the prologue to push the link
26619 register, changing the stack offsets. In this case we can just
26620 return false, since the presence of far jumps in the function will
26621 not affect stack offsets.
26623 If the arg pointer is live (or if it was live, but has now been
26624 eliminated and so set to dead) then we do have to test to see if
26625 the function might contain a far jump. This test can lead to some
26626 false negatives, since before reload is completed, then length of
26627 branch instructions is not known, so gcc defaults to returning their
26628 longest length, which in turn sets the far jump attribute to true.
26630 A false negative will not result in bad code being generated, but it
26631 will result in a needless push and pop of the link register. We
26632 hope that this does not occur too often.
26634 If we need doubleword stack alignment this could affect the other
26635 elimination offsets so we can't risk getting it wrong. */
26636 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
26637 cfun
->machine
->arg_pointer_live
= 1;
26638 else if (!cfun
->machine
->arg_pointer_live
)
26642 /* We should not change far_jump_used during or after reload, as there is
26643 no chance to change stack frame layout. */
26644 if (reload_in_progress
|| reload_completed
)
26647 /* Check to see if the function contains a branch
26648 insn with the far jump attribute set. */
26649 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
26651 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26655 func_size
+= get_attr_length (insn
);
26658 /* Attribute far_jump will always be true for thumb1 before
26659 shorten_branch pass. So checking far_jump attribute before
26660 shorten_branch isn't much useful.
26662 Following heuristic tries to estimate more accurately if a far jump
26663 may finally be used. The heuristic is very conservative as there is
26664 no chance to roll-back the decision of not to use far jump.
26666 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26667 2-byte insn is associated with a 4 byte constant pool. Using
26668 function size 2048/3 as the threshold is conservative enough. */
26671 if ((func_size
* 3) >= 2048)
26673 /* Record the fact that we have decided that
26674 the function does use far jumps. */
26675 cfun
->machine
->far_jump_used
= 1;
26683 /* Return nonzero if FUNC must be entered in ARM mode. */
26685 is_called_in_ARM_mode (tree func
)
26687 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
26689 /* Ignore the problem about functions whose address is taken. */
26690 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
26694 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
26700 /* Given the stack offsets and register mask in OFFSETS, decide how
26701 many additional registers to push instead of subtracting a constant
26702 from SP. For epilogues the principle is the same except we use pop.
26703 FOR_PROLOGUE indicates which we're generating. */
26705 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
26707 HOST_WIDE_INT amount
;
26708 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
26709 /* Extract a mask of the ones we can give to the Thumb's push/pop
26711 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
26712 /* Then count how many other high registers will need to be pushed. */
26713 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26714 int n_free
, reg_base
, size
;
26716 if (!for_prologue
&& frame_pointer_needed
)
26717 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26719 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26721 /* If the stack frame size is 512 exactly, we can save one load
26722 instruction, which should make this a win even when optimizing
26724 if (!optimize_size
&& amount
!= 512)
26727 /* Can't do this if there are high registers to push. */
26728 if (high_regs_pushed
!= 0)
26731 /* Shouldn't do it in the prologue if no registers would normally
26732 be pushed at all. In the epilogue, also allow it if we'll have
26733 a pop insn for the PC. */
26736 || TARGET_BACKTRACE
26737 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
26738 || TARGET_INTERWORK
26739 || crtl
->args
.pretend_args_size
!= 0))
26742 /* Don't do this if thumb_expand_prologue wants to emit instructions
26743 between the push and the stack frame allocation. */
26745 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26746 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
26753 size
= arm_size_return_regs ();
26754 reg_base
= ARM_NUM_INTS (size
);
26755 live_regs_mask
>>= reg_base
;
26758 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
26759 && (for_prologue
|| call_used_or_fixed_reg_p (reg_base
+ n_free
)))
26761 live_regs_mask
>>= 1;
26767 gcc_assert (amount
/ 4 * 4 == amount
);
26769 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
26770 return (amount
- 508) / 4;
26771 if (amount
<= n_free
* 4)
26776 /* The bits which aren't usefully expanded as rtl. */
26778 thumb1_unexpanded_epilogue (void)
26780 arm_stack_offsets
*offsets
;
26782 unsigned long live_regs_mask
= 0;
26783 int high_regs_pushed
= 0;
26785 int had_to_push_lr
;
26788 if (cfun
->machine
->return_used_this_function
!= 0)
26791 if (IS_NAKED (arm_current_func_type ()))
26794 offsets
= arm_get_frame_offsets ();
26795 live_regs_mask
= offsets
->saved_regs_mask
;
26796 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26798 /* If we can deduce the registers used from the function's return value.
26799 This is more reliable that examining df_regs_ever_live_p () because that
26800 will be set if the register is ever used in the function, not just if
26801 the register is used to hold a return value. */
26802 size
= arm_size_return_regs ();
26804 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
26807 unsigned long extra_mask
= (1 << extra_pop
) - 1;
26808 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
26811 /* The prolog may have pushed some high registers to use as
26812 work registers. e.g. the testsuite file:
26813 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26814 compiles to produce:
26815 push {r4, r5, r6, r7, lr}
26819 as part of the prolog. We have to undo that pushing here. */
26821 if (high_regs_pushed
)
26823 unsigned long mask
= live_regs_mask
& 0xff;
26826 mask
|= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26829 /* Oh dear! We have no low registers into which we can pop
26832 ("no low registers available for popping high registers");
26834 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
26835 if (live_regs_mask
& (1 << next_hi_reg
))
26838 while (high_regs_pushed
)
26840 /* Find lo register(s) into which the high register(s) can
26842 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
26844 if (mask
& (1 << regno
))
26845 high_regs_pushed
--;
26846 if (high_regs_pushed
== 0)
26850 if (high_regs_pushed
== 0 && regno
>= 0)
26851 mask
&= ~((1 << regno
) - 1);
26853 /* Pop the values into the low register(s). */
26854 thumb_pop (asm_out_file
, mask
);
26856 /* Move the value(s) into the high registers. */
26857 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
26859 if (mask
& (1 << regno
))
26861 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
26864 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
26866 if (live_regs_mask
& (1 << next_hi_reg
))
26871 live_regs_mask
&= ~0x0f00;
26874 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
26875 live_regs_mask
&= 0xff;
26877 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
26879 /* Pop the return address into the PC. */
26880 if (had_to_push_lr
)
26881 live_regs_mask
|= 1 << PC_REGNUM
;
26883 /* Either no argument registers were pushed or a backtrace
26884 structure was created which includes an adjusted stack
26885 pointer, so just pop everything. */
26886 if (live_regs_mask
)
26887 thumb_pop (asm_out_file
, live_regs_mask
);
26889 /* We have either just popped the return address into the
26890 PC or it is was kept in LR for the entire function.
26891 Note that thumb_pop has already called thumb_exit if the
26892 PC was in the list. */
26893 if (!had_to_push_lr
)
26894 thumb_exit (asm_out_file
, LR_REGNUM
);
26898 /* Pop everything but the return address. */
26899 if (live_regs_mask
)
26900 thumb_pop (asm_out_file
, live_regs_mask
);
26902 if (had_to_push_lr
)
26906 /* We have no free low regs, so save one. */
26907 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
26911 /* Get the return address into a temporary register. */
26912 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
26916 /* Move the return address to lr. */
26917 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
26919 /* Restore the low register. */
26920 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
26925 regno
= LAST_ARG_REGNUM
;
26930 /* Remove the argument registers that were pushed onto the stack. */
26931 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
26932 SP_REGNUM
, SP_REGNUM
,
26933 crtl
->args
.pretend_args_size
);
26935 thumb_exit (asm_out_file
, regno
);
26941 /* Functions to save and restore machine-specific function data. */
26942 static struct machine_function
*
26943 arm_init_machine_status (void)
26945 struct machine_function
*machine
;
26946 machine
= ggc_cleared_alloc
<machine_function
> ();
26948 #if ARM_FT_UNKNOWN != 0
26949 machine
->func_type
= ARM_FT_UNKNOWN
;
26951 machine
->static_chain_stack_bytes
= -1;
26952 machine
->pacspval_needed
= 0;
26956 /* Return an RTX indicating where the return address to the
26957 calling function can be found. */
26959 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
26964 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
26967 /* Do anything needed before RTL is emitted for each function. */
26969 arm_init_expanders (void)
26971 /* Arrange to initialize and mark the machine per-function status. */
26972 init_machine_status
= arm_init_machine_status
;
26974 /* This is to stop the combine pass optimizing away the alignment
26975 adjustment of va_arg. */
26976 /* ??? It is claimed that this should not be necessary. */
26978 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
26981 /* Check that FUNC is called with a different mode. */
26984 arm_change_mode_p (tree func
)
26986 if (TREE_CODE (func
) != FUNCTION_DECL
)
26989 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
26992 callee_tree
= target_option_default_node
;
26994 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
26995 int flags
= callee_opts
->x_target_flags
;
26997 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
27000 /* Like arm_compute_initial_elimination offset. Simpler because there
27001 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27002 to point at the base of the local variables after static stack
27003 space for a function has been allocated. */
27006 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
27008 arm_stack_offsets
*offsets
;
27010 offsets
= arm_get_frame_offsets ();
27014 case ARG_POINTER_REGNUM
:
27017 case STACK_POINTER_REGNUM
:
27018 return offsets
->outgoing_args
- offsets
->saved_args
;
27020 case FRAME_POINTER_REGNUM
:
27021 return offsets
->soft_frame
- offsets
->saved_args
;
27023 case ARM_HARD_FRAME_POINTER_REGNUM
:
27024 return offsets
->saved_regs
- offsets
->saved_args
;
27026 case THUMB_HARD_FRAME_POINTER_REGNUM
:
27027 return offsets
->locals_base
- offsets
->saved_args
;
27030 gcc_unreachable ();
27034 case FRAME_POINTER_REGNUM
:
27037 case STACK_POINTER_REGNUM
:
27038 return offsets
->outgoing_args
- offsets
->soft_frame
;
27040 case ARM_HARD_FRAME_POINTER_REGNUM
:
27041 return offsets
->saved_regs
- offsets
->soft_frame
;
27043 case THUMB_HARD_FRAME_POINTER_REGNUM
:
27044 return offsets
->locals_base
- offsets
->soft_frame
;
27047 gcc_unreachable ();
27052 gcc_unreachable ();
27056 /* Generate the function's prologue. */
27059 thumb1_expand_prologue (void)
27063 HOST_WIDE_INT amount
;
27064 HOST_WIDE_INT size
;
27065 arm_stack_offsets
*offsets
;
27066 unsigned long func_type
;
27068 unsigned long live_regs_mask
;
27069 unsigned long l_mask
;
27070 unsigned high_regs_pushed
= 0;
27071 bool lr_needs_saving
;
27073 func_type
= arm_current_func_type ();
27075 /* Naked functions don't have prologues. */
27076 if (IS_NAKED (func_type
))
27078 if (flag_stack_usage_info
)
27079 current_function_static_stack_size
= 0;
27083 if (IS_INTERRUPT (func_type
))
27085 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
27089 if (is_called_in_ARM_mode (current_function_decl
))
27090 emit_insn (gen_prologue_thumb1_interwork ());
27092 offsets
= arm_get_frame_offsets ();
27093 live_regs_mask
= offsets
->saved_regs_mask
;
27094 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
27096 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27097 l_mask
= live_regs_mask
& 0x40ff;
27098 /* Then count how many other high registers will need to be pushed. */
27099 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
27101 if (crtl
->args
.pretend_args_size
)
27103 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
27105 if (cfun
->machine
->uses_anonymous_args
)
27107 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
27108 unsigned long mask
;
27110 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
27111 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
27113 insn
= thumb1_emit_multi_reg_push (mask
, 0);
27117 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27118 stack_pointer_rtx
, x
));
27120 RTX_FRAME_RELATED_P (insn
) = 1;
27123 if (TARGET_BACKTRACE
)
27125 HOST_WIDE_INT offset
= 0;
27126 unsigned work_register
;
27127 rtx work_reg
, x
, arm_hfp_rtx
;
27129 /* We have been asked to create a stack backtrace structure.
27130 The code looks like this:
27134 0 sub SP, #16 Reserve space for 4 registers.
27135 2 push {R7} Push low registers.
27136 4 add R7, SP, #20 Get the stack pointer before the push.
27137 6 str R7, [SP, #8] Store the stack pointer
27138 (before reserving the space).
27139 8 mov R7, PC Get hold of the start of this code + 12.
27140 10 str R7, [SP, #16] Store it.
27141 12 mov R7, FP Get hold of the current frame pointer.
27142 14 str R7, [SP, #4] Store it.
27143 16 mov R7, LR Get hold of the current return address.
27144 18 str R7, [SP, #12] Store it.
27145 20 add R7, SP, #16 Point at the start of the
27146 backtrace structure.
27147 22 mov FP, R7 Put this value into the frame pointer. */
27149 work_register
= thumb_find_work_register (live_regs_mask
);
27150 work_reg
= gen_rtx_REG (SImode
, work_register
);
27151 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
27153 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27154 stack_pointer_rtx
, GEN_INT (-16)));
27155 RTX_FRAME_RELATED_P (insn
) = 1;
27159 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
27160 RTX_FRAME_RELATED_P (insn
) = 1;
27161 lr_needs_saving
= false;
27163 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
27166 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
27167 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27169 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
27170 x
= gen_frame_mem (SImode
, x
);
27171 emit_move_insn (x
, work_reg
);
27173 /* Make sure that the instruction fetching the PC is in the right place
27174 to calculate "start of backtrace creation code + 12". */
27175 /* ??? The stores using the common WORK_REG ought to be enough to
27176 prevent the scheduler from doing anything weird. Failing that
27177 we could always move all of the following into an UNSPEC_VOLATILE. */
27180 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27181 emit_move_insn (work_reg
, x
);
27183 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27184 x
= gen_frame_mem (SImode
, x
);
27185 emit_move_insn (x
, work_reg
);
27187 emit_move_insn (work_reg
, arm_hfp_rtx
);
27189 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27190 x
= gen_frame_mem (SImode
, x
);
27191 emit_move_insn (x
, work_reg
);
27195 emit_move_insn (work_reg
, arm_hfp_rtx
);
27197 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27198 x
= gen_frame_mem (SImode
, x
);
27199 emit_move_insn (x
, work_reg
);
27201 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27202 emit_move_insn (work_reg
, x
);
27204 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27205 x
= gen_frame_mem (SImode
, x
);
27206 emit_move_insn (x
, work_reg
);
27209 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
27210 emit_move_insn (work_reg
, x
);
27212 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
27213 x
= gen_frame_mem (SImode
, x
);
27214 emit_move_insn (x
, work_reg
);
27216 x
= GEN_INT (offset
+ 12);
27217 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27219 emit_move_insn (arm_hfp_rtx
, work_reg
);
27221 /* Optimization: If we are not pushing any low registers but we are going
27222 to push some high registers then delay our first push. This will just
27223 be a push of LR and we can combine it with the push of the first high
27225 else if ((l_mask
& 0xff) != 0
27226 || (high_regs_pushed
== 0 && lr_needs_saving
))
27228 unsigned long mask
= l_mask
;
27229 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
27230 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
27231 RTX_FRAME_RELATED_P (insn
) = 1;
27232 lr_needs_saving
= false;
27235 if (high_regs_pushed
)
27237 unsigned pushable_regs
;
27238 unsigned next_hi_reg
;
27239 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
27240 : crtl
->args
.info
.nregs
;
27241 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
27243 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
27244 if (live_regs_mask
& (1 << next_hi_reg
))
27247 /* Here we need to mask out registers used for passing arguments
27248 even if they can be pushed. This is to avoid using them to
27249 stash the high registers. Such kind of stash may clobber the
27250 use of arguments. */
27251 pushable_regs
= l_mask
& (~arg_regs_mask
);
27252 pushable_regs
|= thumb1_prologue_unused_call_clobbered_lo_regs ();
27254 /* Normally, LR can be used as a scratch register once it has been
27255 saved; but if the function examines its own return address then
27256 the value is still live and we need to avoid using it. */
27257 bool return_addr_live
27258 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
27261 if (lr_needs_saving
|| return_addr_live
)
27262 pushable_regs
&= ~(1 << LR_REGNUM
);
27264 if (pushable_regs
== 0)
27265 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
27267 while (high_regs_pushed
> 0)
27269 unsigned long real_regs_mask
= 0;
27270 unsigned long push_mask
= 0;
27272 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
27274 if (pushable_regs
& (1 << regno
))
27276 emit_move_insn (gen_rtx_REG (SImode
, regno
),
27277 gen_rtx_REG (SImode
, next_hi_reg
));
27279 high_regs_pushed
--;
27280 real_regs_mask
|= (1 << next_hi_reg
);
27281 push_mask
|= (1 << regno
);
27283 if (high_regs_pushed
)
27285 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
27287 if (live_regs_mask
& (1 << next_hi_reg
))
27295 /* If we had to find a work register and we have not yet
27296 saved the LR then add it to the list of regs to push. */
27297 if (lr_needs_saving
)
27299 push_mask
|= 1 << LR_REGNUM
;
27300 real_regs_mask
|= 1 << LR_REGNUM
;
27301 lr_needs_saving
= false;
27302 /* If the return address is not live at this point, we
27303 can add LR to the list of registers that we can use
27305 if (!return_addr_live
)
27306 pushable_regs
|= 1 << LR_REGNUM
;
27309 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
27310 RTX_FRAME_RELATED_P (insn
) = 1;
27314 /* Load the pic register before setting the frame pointer,
27315 so we can use r7 as a temporary work register. */
27316 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
27317 arm_load_pic_register (live_regs_mask
, NULL_RTX
);
27319 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
27320 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
27321 stack_pointer_rtx
);
27323 size
= offsets
->outgoing_args
- offsets
->saved_args
;
27324 if (flag_stack_usage_info
)
27325 current_function_static_stack_size
= size
;
27327 /* If we have a frame, then do stack checking. FIXME: not implemented. */
27328 if ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27329 || flag_stack_clash_protection
)
27331 sorry ("%<-fstack-check=specific%> for Thumb-1");
27333 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27334 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
27339 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27340 GEN_INT (- amount
)));
27341 RTX_FRAME_RELATED_P (insn
) = 1;
27347 /* The stack decrement is too big for an immediate value in a single
27348 insn. In theory we could issue multiple subtracts, but after
27349 three of them it becomes more space efficient to place the full
27350 value in the constant pool and load into a register. (Also the
27351 ARM debugger really likes to see only one stack decrement per
27352 function). So instead we look for a scratch register into which
27353 we can load the decrement, and then we subtract this from the
27354 stack pointer. Unfortunately on the thumb the only available
27355 scratch registers are the argument registers, and we cannot use
27356 these as they may hold arguments to the function. Instead we
27357 attempt to locate a call preserved register which is used by this
27358 function. If we can find one, then we know that it will have
27359 been pushed at the start of the prologue and so we can corrupt
27361 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
27362 if (live_regs_mask
& (1 << regno
))
27365 gcc_assert(regno
<= LAST_LO_REGNUM
);
27367 reg
= gen_rtx_REG (SImode
, regno
);
27369 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
27371 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27372 stack_pointer_rtx
, reg
));
27374 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
27375 plus_constant (Pmode
, stack_pointer_rtx
,
27377 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
27378 RTX_FRAME_RELATED_P (insn
) = 1;
27382 if (frame_pointer_needed
)
27383 thumb_set_frame_pointer (offsets
);
27385 /* If we are profiling, make sure no instructions are scheduled before
27386 the call to mcount. Similarly if the user has requested no
27387 scheduling in the prolog. Similarly if we want non-call exceptions
27388 using the EABI unwinder, to prevent faulting instructions from being
27389 swapped with a stack adjustment. */
27390 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
27391 || (arm_except_unwind_info (&global_options
) == UI_TARGET
27392 && cfun
->can_throw_non_call_exceptions
))
27393 emit_insn (gen_blockage ());
27395 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
27396 if (live_regs_mask
& 0xff)
27397 cfun
->machine
->lr_save_eliminated
= 0;
27400 /* Clear caller saved registers not used to pass return values and leaked
27401 condition flags before exiting a cmse_nonsecure_entry function. */
27404 cmse_nonsecure_entry_clear_before_return (void)
27406 bool clear_vfpregs
= TARGET_HARD_FLOAT
|| TARGET_HAVE_FPCXT_CMSE
;
27407 int regno
, maxregno
= clear_vfpregs
? LAST_VFP_REGNUM
: IP_REGNUM
;
27408 uint32_t padding_bits_to_clear
= 0;
27409 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
27410 rtx r1_reg
, result_rtl
, clearing_reg
= NULL_RTX
;
27413 bitmap_clear (to_clear_bitmap
);
27414 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
27415 bitmap_set_bit (to_clear_bitmap
, IP_REGNUM
);
27417 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27421 int float_bits
= D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1;
27423 bitmap_set_range (to_clear_bitmap
, FIRST_VFP_REGNUM
, float_bits
);
27425 if (!TARGET_HAVE_FPCXT_CMSE
)
27427 /* Make sure we don't clear the two scratch registers used to clear
27428 the relevant FPSCR bits in output_return_instruction. */
27429 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
27430 bitmap_clear_bit (to_clear_bitmap
, IP_REGNUM
);
27431 emit_use (gen_rtx_REG (SImode
, 4));
27432 bitmap_clear_bit (to_clear_bitmap
, 4);
27436 /* If the user has defined registers to be caller saved, these are no longer
27437 restored by the function before returning and must thus be cleared for
27438 security purposes. */
27439 for (regno
= NUM_ARG_REGS
; regno
<= maxregno
; regno
++)
27441 /* We do not touch registers that can be used to pass arguments as per
27442 the AAPCS, since these should never be made callee-saved by user
27444 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
27446 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
27448 if (!callee_saved_reg_p (regno
)
27449 && (!IN_RANGE (regno
, FIRST_VFP_REGNUM
, LAST_VFP_REGNUM
)
27450 || TARGET_HARD_FLOAT
))
27451 bitmap_set_bit (to_clear_bitmap
, regno
);
27454 /* Make sure we do not clear the registers used to return the result in. */
27455 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
27456 if (!VOID_TYPE_P (result_type
))
27458 uint64_t to_clear_return_mask
;
27459 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
27461 /* No need to check that we return in registers, because we don't
27462 support returning on stack yet. */
27463 gcc_assert (REG_P (result_rtl
));
27464 to_clear_return_mask
27465 = compute_not_to_clear_mask (result_type
, result_rtl
, 0,
27466 &padding_bits_to_clear
);
27467 if (to_clear_return_mask
)
27469 gcc_assert ((unsigned) maxregno
< sizeof (long long) * __CHAR_BIT__
);
27470 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
27472 if (to_clear_return_mask
& (1ULL << regno
))
27473 bitmap_clear_bit (to_clear_bitmap
, regno
);
27478 if (padding_bits_to_clear
!= 0)
27480 int to_clear_bitmap_size
= SBITMAP_SIZE ((sbitmap
) to_clear_bitmap
);
27481 auto_sbitmap
to_clear_arg_regs_bitmap (to_clear_bitmap_size
);
27483 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27484 returning a composite type, which only uses r0. Let's make sure that
27485 r1-r3 is cleared too. */
27486 bitmap_clear (to_clear_arg_regs_bitmap
);
27487 bitmap_set_range (to_clear_arg_regs_bitmap
, R1_REGNUM
, NUM_ARG_REGS
- 1);
27488 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap
, to_clear_bitmap
));
27491 /* Clear full registers that leak before returning. */
27492 clearing_reg
= gen_rtx_REG (SImode
, TARGET_THUMB1
? R0_REGNUM
: LR_REGNUM
);
27493 r1_reg
= gen_rtx_REG (SImode
, R0_REGNUM
+ 1);
27494 cmse_clear_registers (to_clear_bitmap
, &padding_bits_to_clear
, 1, r1_reg
,
27498 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27499 POP instruction can be generated. LR should be replaced by PC. All
27500 the checks required are already done by USE_RETURN_INSN (). Hence,
27501 all we really need to check here is if single register is to be
27502 returned, or multiple register return. */
27504 thumb2_expand_return (bool simple_return
)
27507 unsigned long saved_regs_mask
;
27508 arm_stack_offsets
*offsets
;
27510 offsets
= arm_get_frame_offsets ();
27511 saved_regs_mask
= offsets
->saved_regs_mask
;
27513 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27514 if (saved_regs_mask
& (1 << i
))
27517 if (!simple_return
&& saved_regs_mask
)
27519 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27520 functions or adapt code to handle according to ACLE. This path should
27521 not be reachable for cmse_nonsecure_entry functions though we prefer
27522 to assert it for now to ensure that future code changes do not silently
27523 change this behavior. */
27524 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27525 if (arm_current_function_pac_enabled_p ())
27527 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
27528 arm_emit_multi_reg_pop (saved_regs_mask
);
27529 emit_insn (gen_aut_nop ());
27530 emit_jump_insn (simple_return_rtx
);
27532 else if (num_regs
== 1)
27534 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27535 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
27536 rtx addr
= gen_rtx_MEM (SImode
,
27537 gen_rtx_POST_INC (SImode
,
27538 stack_pointer_rtx
));
27539 set_mem_alias_set (addr
, get_frame_alias_set ());
27540 XVECEXP (par
, 0, 0) = ret_rtx
;
27541 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
27542 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
27543 emit_jump_insn (par
);
27547 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
27548 saved_regs_mask
|= (1 << PC_REGNUM
);
27549 arm_emit_multi_reg_pop (saved_regs_mask
);
27554 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27555 cmse_nonsecure_entry_clear_before_return ();
27556 emit_jump_insn (simple_return_rtx
);
27561 thumb1_expand_epilogue (void)
27563 HOST_WIDE_INT amount
;
27564 arm_stack_offsets
*offsets
;
27567 /* Naked functions don't have prologues. */
27568 if (IS_NAKED (arm_current_func_type ()))
27571 offsets
= arm_get_frame_offsets ();
27572 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27574 if (frame_pointer_needed
)
27576 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
27577 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27579 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
27581 gcc_assert (amount
>= 0);
27584 emit_insn (gen_blockage ());
27587 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27588 GEN_INT (amount
)));
27591 /* r3 is always free in the epilogue. */
27592 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
27594 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
27595 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
27599 /* Emit a USE (stack_pointer_rtx), so that
27600 the stack adjustment will not be deleted. */
27601 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27603 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
27604 emit_insn (gen_blockage ());
27606 /* Emit a clobber for each insn that will be restored in the epilogue,
27607 so that flow2 will get register lifetimes correct. */
27608 for (regno
= 0; regno
< 13; regno
++)
27609 if (reg_needs_saving_p (regno
))
27610 emit_clobber (gen_rtx_REG (SImode
, regno
));
27612 if (! df_regs_ever_live_p (LR_REGNUM
))
27613 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
27615 /* Clear all caller-saved regs that are not used to return. */
27616 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27617 cmse_nonsecure_entry_clear_before_return ();
27620 /* Epilogue code for APCS frame. */
27622 arm_expand_epilogue_apcs_frame (bool really_return
)
27624 unsigned long func_type
;
27625 unsigned long saved_regs_mask
;
27628 int floats_from_frame
= 0;
27629 arm_stack_offsets
*offsets
;
27631 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
27632 func_type
= arm_current_func_type ();
27634 /* Get frame offsets for ARM. */
27635 offsets
= arm_get_frame_offsets ();
27636 saved_regs_mask
= offsets
->saved_regs_mask
;
27638 /* Find the offset of the floating-point save area in the frame. */
27640 = (offsets
->saved_args
27641 + arm_compute_static_chain_stack_bytes ()
27644 /* Compute how many core registers saved and how far away the floats are. */
27645 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27646 if (saved_regs_mask
& (1 << i
))
27649 floats_from_frame
+= 4;
27652 if (TARGET_VFP_BASE
)
27655 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
27657 /* The offset is from IP_REGNUM. */
27658 int saved_size
= arm_get_vfp_saved_size ();
27659 if (saved_size
> 0)
27662 floats_from_frame
+= saved_size
;
27663 insn
= emit_insn (gen_addsi3 (ip_rtx
,
27664 hard_frame_pointer_rtx
,
27665 GEN_INT (-floats_from_frame
)));
27666 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
27667 ip_rtx
, hard_frame_pointer_rtx
);
27670 /* Generate VFP register multi-pop. */
27671 start_reg
= FIRST_VFP_REGNUM
;
27673 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
27674 /* Look for a case where a reg does not need restoring. */
27675 if (!reg_needs_saving_p (i
) && !reg_needs_saving_p (i
+ 1))
27677 if (start_reg
!= i
)
27678 arm_emit_vfp_multi_reg_pop (start_reg
,
27679 (i
- start_reg
) / 2,
27680 gen_rtx_REG (SImode
,
27685 /* Restore the remaining regs that we have discovered (or possibly
27686 even all of them, if the conditional in the for loop never
27688 if (start_reg
!= i
)
27689 arm_emit_vfp_multi_reg_pop (start_reg
,
27690 (i
- start_reg
) / 2,
27691 gen_rtx_REG (SImode
, IP_REGNUM
));
27696 /* The frame pointer is guaranteed to be non-double-word aligned, as
27697 it is set to double-word-aligned old_stack_pointer - 4. */
27699 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
27701 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
27702 if (reg_needs_saving_p (i
))
27704 rtx addr
= gen_frame_mem (V2SImode
,
27705 plus_constant (Pmode
, hard_frame_pointer_rtx
,
27707 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27708 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27709 gen_rtx_REG (V2SImode
, i
),
27715 /* saved_regs_mask should contain IP which contains old stack pointer
27716 at the time of activation creation. Since SP and IP are adjacent registers,
27717 we can restore the value directly into SP. */
27718 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
27719 saved_regs_mask
&= ~(1 << IP_REGNUM
);
27720 saved_regs_mask
|= (1 << SP_REGNUM
);
27722 /* There are two registers left in saved_regs_mask - LR and PC. We
27723 only need to restore LR (the return address), but to
27724 save time we can load it directly into PC, unless we need a
27725 special function exit sequence, or we are not really returning. */
27727 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
27728 && !crtl
->calls_eh_return
)
27729 /* Delete LR from the register mask, so that LR on
27730 the stack is loaded into the PC in the register mask. */
27731 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27733 saved_regs_mask
&= ~(1 << PC_REGNUM
);
27735 num_regs
= bit_count (saved_regs_mask
);
27736 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
27739 emit_insn (gen_blockage ());
27740 /* Unwind the stack to just below the saved registers. */
27741 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27742 hard_frame_pointer_rtx
,
27743 GEN_INT (- 4 * num_regs
)));
27745 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
27746 stack_pointer_rtx
, hard_frame_pointer_rtx
);
27749 arm_emit_multi_reg_pop (saved_regs_mask
);
27751 if (IS_INTERRUPT (func_type
))
27753 /* Interrupt handlers will have pushed the
27754 IP onto the stack, so restore it now. */
27756 rtx addr
= gen_rtx_MEM (SImode
,
27757 gen_rtx_POST_INC (SImode
,
27758 stack_pointer_rtx
));
27759 set_mem_alias_set (addr
, get_frame_alias_set ());
27760 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
27761 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27762 gen_rtx_REG (SImode
, IP_REGNUM
),
27766 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
27769 if (crtl
->calls_eh_return
)
27770 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27772 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27774 if (IS_STACKALIGN (func_type
))
27775 /* Restore the original stack pointer. Before prologue, the stack was
27776 realigned and the original stack pointer saved in r0. For details,
27777 see comment in arm_expand_prologue. */
27778 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
27780 emit_jump_insn (simple_return_rtx
);
27783 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27784 function is not a sibcall. */
27786 arm_expand_epilogue (bool really_return
)
27788 unsigned long func_type
;
27789 unsigned long saved_regs_mask
;
27793 arm_stack_offsets
*offsets
;
27795 func_type
= arm_current_func_type ();
27797 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27798 let output_return_instruction take care of instruction emission if any. */
27799 if (IS_NAKED (func_type
)
27800 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
27803 emit_jump_insn (simple_return_rtx
);
27807 /* If we are throwing an exception, then we really must be doing a
27808 return, so we can't tail-call. */
27809 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
27811 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
27813 arm_expand_epilogue_apcs_frame (really_return
);
27817 /* Get frame offsets for ARM. */
27818 offsets
= arm_get_frame_offsets ();
27819 saved_regs_mask
= offsets
->saved_regs_mask
;
27820 num_regs
= bit_count (saved_regs_mask
);
27822 if (frame_pointer_needed
)
27825 /* Restore stack pointer if necessary. */
27828 /* In ARM mode, frame pointer points to first saved register.
27829 Restore stack pointer to last saved register. */
27830 amount
= offsets
->frame
- offsets
->saved_regs
;
27832 /* Force out any pending memory operations that reference stacked data
27833 before stack de-allocation occurs. */
27834 emit_insn (gen_blockage ());
27835 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27836 hard_frame_pointer_rtx
,
27837 GEN_INT (amount
)));
27838 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27840 hard_frame_pointer_rtx
);
27842 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27844 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27848 /* In Thumb-2 mode, the frame pointer points to the last saved
27850 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27853 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
27854 hard_frame_pointer_rtx
,
27855 GEN_INT (amount
)));
27856 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27857 hard_frame_pointer_rtx
,
27858 hard_frame_pointer_rtx
);
27861 /* Force out any pending memory operations that reference stacked data
27862 before stack de-allocation occurs. */
27863 emit_insn (gen_blockage ());
27864 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
27865 hard_frame_pointer_rtx
));
27866 arm_add_cfa_adjust_cfa_note (insn
, 0,
27868 hard_frame_pointer_rtx
);
27869 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27871 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27876 /* Pop off outgoing args and local frame to adjust stack pointer to
27877 last saved register. */
27878 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27882 /* Force out any pending memory operations that reference stacked data
27883 before stack de-allocation occurs. */
27884 emit_insn (gen_blockage ());
27885 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27887 GEN_INT (amount
)));
27888 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27889 stack_pointer_rtx
, stack_pointer_rtx
);
27890 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27892 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27896 if (TARGET_VFP_BASE
)
27898 /* Generate VFP register multi-pop. */
27899 int end_reg
= LAST_VFP_REGNUM
+ 1;
27901 /* Scan the registers in reverse order. We need to match
27902 any groupings made in the prologue and generate matching
27903 vldm operations. The need to match groups is because,
27904 unlike pop, vldm can only do consecutive regs. */
27905 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
27906 /* Look for a case where a reg does not need restoring. */
27907 if (!reg_needs_saving_p (i
) && !reg_needs_saving_p (i
+ 1))
27909 /* Restore the regs discovered so far (from reg+2 to
27911 if (end_reg
> i
+ 2)
27912 arm_emit_vfp_multi_reg_pop (i
+ 2,
27913 (end_reg
- (i
+ 2)) / 2,
27914 stack_pointer_rtx
);
27918 /* Restore the remaining regs that we have discovered (or possibly
27919 even all of them, if the conditional in the for loop never
27921 if (end_reg
> i
+ 2)
27922 arm_emit_vfp_multi_reg_pop (i
+ 2,
27923 (end_reg
- (i
+ 2)) / 2,
27924 stack_pointer_rtx
);
27928 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
27929 if (reg_needs_saving_p (i
))
27932 rtx addr
= gen_rtx_MEM (V2SImode
,
27933 gen_rtx_POST_INC (SImode
,
27934 stack_pointer_rtx
));
27935 set_mem_alias_set (addr
, get_frame_alias_set ());
27936 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27937 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27938 gen_rtx_REG (V2SImode
, i
),
27940 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27941 stack_pointer_rtx
, stack_pointer_rtx
);
27944 if (saved_regs_mask
)
27947 bool return_in_pc
= false;
27949 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
27950 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
27951 && !IS_CMSE_ENTRY (func_type
)
27952 && !IS_STACKALIGN (func_type
)
27954 && crtl
->args
.pretend_args_size
== 0
27955 && saved_regs_mask
& (1 << LR_REGNUM
)
27956 && !crtl
->calls_eh_return
27957 && !arm_current_function_pac_enabled_p ())
27959 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27960 saved_regs_mask
|= (1 << PC_REGNUM
);
27961 return_in_pc
= true;
27964 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
27966 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27967 if (saved_regs_mask
& (1 << i
))
27969 rtx addr
= gen_rtx_MEM (SImode
,
27970 gen_rtx_POST_INC (SImode
,
27971 stack_pointer_rtx
));
27972 set_mem_alias_set (addr
, get_frame_alias_set ());
27974 if (i
== PC_REGNUM
)
27976 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27977 XVECEXP (insn
, 0, 0) = ret_rtx
;
27978 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
27980 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
27981 insn
= emit_jump_insn (insn
);
27985 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
27987 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27988 gen_rtx_REG (SImode
, i
),
27990 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27992 stack_pointer_rtx
);
27999 && current_tune
->prefer_ldrd_strd
28000 && !optimize_function_for_size_p (cfun
))
28003 thumb2_emit_ldrd_pop (saved_regs_mask
);
28004 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
28005 arm_emit_ldrd_pop (saved_regs_mask
);
28007 arm_emit_multi_reg_pop (saved_regs_mask
);
28010 arm_emit_multi_reg_pop (saved_regs_mask
);
28018 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
28022 rtx dwarf
= NULL_RTX
;
28024 emit_insn (gen_addsi3 (stack_pointer_rtx
,
28026 GEN_INT (amount
)));
28028 RTX_FRAME_RELATED_P (tmp
) = 1;
28030 if (cfun
->machine
->uses_anonymous_args
)
28032 /* Restore pretend args. Refer arm_expand_prologue on how to save
28033 pretend_args in stack. */
28034 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
28035 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
28036 for (j
= 0, i
= 0; j
< num_regs
; i
++)
28037 if (saved_regs_mask
& (1 << i
))
28039 rtx reg
= gen_rtx_REG (SImode
, i
);
28040 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
28043 REG_NOTES (tmp
) = dwarf
;
28045 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
28046 stack_pointer_rtx
, stack_pointer_rtx
);
28049 if (IS_CMSE_ENTRY (func_type
))
28051 /* CMSE_ENTRY always returns. */
28052 gcc_assert (really_return
);
28053 /* Clear all caller-saved regs that are not used to return. */
28054 cmse_nonsecure_entry_clear_before_return ();
28056 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
28058 if (TARGET_HAVE_FPCXT_CMSE
)
28062 insn
= emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx
,
28063 GEN_INT (FPCXTNS_ENUM
)));
28064 rtx dwarf
= gen_rtx_SET (stack_pointer_rtx
,
28065 plus_constant (Pmode
, stack_pointer_rtx
, 4));
28066 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
28067 RTX_FRAME_RELATED_P (insn
) = 1;
28071 if (arm_current_function_pac_enabled_p ())
28072 emit_insn (gen_aut_nop ());
28074 if (!really_return
)
28077 if (crtl
->calls_eh_return
)
28078 emit_insn (gen_addsi3 (stack_pointer_rtx
,
28080 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
28082 if (IS_STACKALIGN (func_type
))
28083 /* Restore the original stack pointer. Before prologue, the stack was
28084 realigned and the original stack pointer saved in r0. For details,
28085 see comment in arm_expand_prologue. */
28086 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
28088 emit_jump_insn (simple_return_rtx
);
28091 /* Implementation of insn prologue_thumb1_interwork. This is the first
28092 "instruction" of a function called in ARM mode. Swap to thumb mode. */
28095 thumb1_output_interwork (void)
28098 FILE *f
= asm_out_file
;
28100 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
28101 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
28103 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
28105 /* Generate code sequence to switch us into Thumb mode. */
28106 /* The .code 32 directive has already been emitted by
28107 ASM_DECLARE_FUNCTION_NAME. */
28108 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
28109 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
28111 /* Generate a label, so that the debugger will notice the
28112 change in instruction sets. This label is also used by
28113 the assembler to bypass the ARM code when this function
28114 is called from a Thumb encoded function elsewhere in the
28115 same file. Hence the definition of STUB_NAME here must
28116 agree with the definition in gas/config/tc-arm.c. */
28118 #define STUB_NAME ".real_start_of"
28120 fprintf (f
, "\t.code\t16\n");
28122 if (arm_dllexport_name_p (name
))
28123 name
= arm_strip_name_encoding (name
);
28125 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
28126 fprintf (f
, "\t.thumb_func\n");
28127 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
28132 /* Handle the case of a double word load into a low register from
28133 a computed memory address. The computed address may involve a
28134 register which is overwritten by the load. */
28136 thumb_load_double_from_address (rtx
*operands
)
28144 gcc_assert (REG_P (operands
[0]));
28145 gcc_assert (MEM_P (operands
[1]));
28147 /* Get the memory address. */
28148 addr
= XEXP (operands
[1], 0);
28150 /* Work out how the memory address is computed. */
28151 switch (GET_CODE (addr
))
28154 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28156 if (REGNO (operands
[0]) == REGNO (addr
))
28158 output_asm_insn ("ldr\t%H0, %2", operands
);
28159 output_asm_insn ("ldr\t%0, %1", operands
);
28163 output_asm_insn ("ldr\t%0, %1", operands
);
28164 output_asm_insn ("ldr\t%H0, %2", operands
);
28169 /* Compute <address> + 4 for the high order load. */
28170 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28172 output_asm_insn ("ldr\t%0, %1", operands
);
28173 output_asm_insn ("ldr\t%H0, %2", operands
);
28177 arg1
= XEXP (addr
, 0);
28178 arg2
= XEXP (addr
, 1);
28180 if (CONSTANT_P (arg1
))
28181 base
= arg2
, offset
= arg1
;
28183 base
= arg1
, offset
= arg2
;
28185 gcc_assert (REG_P (base
));
28187 /* Catch the case of <address> = <reg> + <reg> */
28188 if (REG_P (offset
))
28190 int reg_offset
= REGNO (offset
);
28191 int reg_base
= REGNO (base
);
28192 int reg_dest
= REGNO (operands
[0]);
28194 /* Add the base and offset registers together into the
28195 higher destination register. */
28196 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
28197 reg_dest
+ 1, reg_base
, reg_offset
);
28199 /* Load the lower destination register from the address in
28200 the higher destination register. */
28201 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
28202 reg_dest
, reg_dest
+ 1);
28204 /* Load the higher destination register from its own address
28206 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
28207 reg_dest
+ 1, reg_dest
+ 1);
28211 /* Compute <address> + 4 for the high order load. */
28212 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28214 /* If the computed address is held in the low order register
28215 then load the high order register first, otherwise always
28216 load the low order register first. */
28217 if (REGNO (operands
[0]) == REGNO (base
))
28219 output_asm_insn ("ldr\t%H0, %2", operands
);
28220 output_asm_insn ("ldr\t%0, %1", operands
);
28224 output_asm_insn ("ldr\t%0, %1", operands
);
28225 output_asm_insn ("ldr\t%H0, %2", operands
);
28231 /* With no registers to worry about we can just load the value
28233 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28235 output_asm_insn ("ldr\t%H0, %2", operands
);
28236 output_asm_insn ("ldr\t%0, %1", operands
);
28240 gcc_unreachable ();
28247 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
28252 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28253 std::swap (operands
[4], operands
[5]);
28255 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
28256 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
28260 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28261 std::swap (operands
[4], operands
[5]);
28262 if (REGNO (operands
[5]) > REGNO (operands
[6]))
28263 std::swap (operands
[5], operands
[6]);
28264 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28265 std::swap (operands
[4], operands
[5]);
28267 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
28268 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
28272 gcc_unreachable ();
28278 /* Output a call-via instruction for thumb state. */
28280 thumb_call_via_reg (rtx reg
)
28282 int regno
= REGNO (reg
);
28285 gcc_assert (regno
< LR_REGNUM
);
28287 /* If we are in the normal text section we can use a single instance
28288 per compilation unit. If we are doing function sections, then we need
28289 an entry per section, since we can't rely on reachability. */
28290 if (in_section
== text_section
)
28292 thumb_call_reg_needed
= 1;
28294 if (thumb_call_via_label
[regno
] == NULL
)
28295 thumb_call_via_label
[regno
] = gen_label_rtx ();
28296 labelp
= thumb_call_via_label
+ regno
;
28300 if (cfun
->machine
->call_via
[regno
] == NULL
)
28301 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
28302 labelp
= cfun
->machine
->call_via
+ regno
;
28305 output_asm_insn ("bl\t%a0", labelp
);
28309 /* Routines for generating rtl. */
28311 thumb_expand_cpymemqi (rtx
*operands
)
28313 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
28314 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
28315 HOST_WIDE_INT len
= INTVAL (operands
[2]);
28316 HOST_WIDE_INT offset
= 0;
28320 emit_insn (gen_cpymem12b (out
, in
, out
, in
));
28326 emit_insn (gen_cpymem8b (out
, in
, out
, in
));
28332 rtx reg
= gen_reg_rtx (SImode
);
28333 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
28334 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
28341 rtx reg
= gen_reg_rtx (HImode
);
28342 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
28343 plus_constant (Pmode
, in
,
28345 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
28354 rtx reg
= gen_reg_rtx (QImode
);
28355 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
28356 plus_constant (Pmode
, in
,
28358 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
28365 thumb_reload_out_hi (rtx
*operands
)
28367 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
28370 /* Return the length of a function name prefix
28371 that starts with the character 'c'. */
28373 arm_get_strip_length (int c
)
28377 ARM_NAME_ENCODING_LENGTHS
28382 /* Return a pointer to a function's name with any
28383 and all prefix encodings stripped from it. */
28385 arm_strip_name_encoding (const char *name
)
28389 while ((skip
= arm_get_strip_length (* name
)))
28395 /* If there is a '*' anywhere in the name's prefix, then
28396 emit the stripped name verbatim, otherwise prepend an
28397 underscore if leading underscores are being used. */
28399 arm_asm_output_labelref (FILE *stream
, const char *name
)
28404 while ((skip
= arm_get_strip_length (* name
)))
28406 verbatim
|= (*name
== '*');
28411 fputs (name
, stream
);
28413 asm_fprintf (stream
, "%U%s", name
);
28416 /* This function is used to emit an EABI tag and its associated value.
28417 We emit the numerical value of the tag in case the assembler does not
28418 support textual tags. (Eg gas prior to 2.20). If requested we include
28419 the tag name in a comment so that anyone reading the assembler output
28420 will know which tag is being set.
28422 This function is not static because arm-c.cc needs it too. */
28425 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
28427 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
28428 if (flag_verbose_asm
|| flag_debug_asm
)
28429 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
28430 asm_fprintf (asm_out_file
, "\n");
28433 /* This function is used to print CPU tuning information as comment
28434 in assembler file. Pointers are not printed for now. */
28437 arm_print_tune_info (void)
28439 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
28440 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
28441 current_tune
->constant_limit
);
28442 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28443 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
28444 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28445 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
28446 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28447 "prefetch.l1_cache_size:\t%d\n",
28448 current_tune
->prefetch
.l1_cache_size
);
28449 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28450 "prefetch.l1_cache_line_size:\t%d\n",
28451 current_tune
->prefetch
.l1_cache_line_size
);
28452 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28453 "prefer_constant_pool:\t%d\n",
28454 (int) current_tune
->prefer_constant_pool
);
28455 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28456 "branch_cost:\t(s:speed, p:predictable)\n");
28457 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
28458 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
28459 current_tune
->branch_cost (false, false));
28460 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
28461 current_tune
->branch_cost (false, true));
28462 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
28463 current_tune
->branch_cost (true, false));
28464 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
28465 current_tune
->branch_cost (true, true));
28466 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28467 "prefer_ldrd_strd:\t%d\n",
28468 (int) current_tune
->prefer_ldrd_strd
);
28469 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28470 "logical_op_non_short_circuit:\t[%d,%d]\n",
28471 (int) current_tune
->logical_op_non_short_circuit_thumb
,
28472 (int) current_tune
->logical_op_non_short_circuit_arm
);
28473 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28474 "disparage_flag_setting_t16_encodings:\t%d\n",
28475 (int) current_tune
->disparage_flag_setting_t16_encodings
);
28476 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28477 "string_ops_prefer_neon:\t%d\n",
28478 (int) current_tune
->string_ops_prefer_neon
);
28479 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28480 "max_insns_inline_memset:\t%d\n",
28481 current_tune
->max_insns_inline_memset
);
28482 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
28483 current_tune
->fusible_ops
);
28484 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
28485 (int) current_tune
->sched_autopref
);
28488 /* The last set of target options used to emit .arch directives, etc. This
28489 could be a function-local static if it were not required to expose it as a
28490 root to the garbage collector. */
28491 static GTY(()) cl_target_option
*last_asm_targ_options
= NULL
;
28493 /* Print .arch and .arch_extension directives corresponding to the
28494 current architecture configuration. */
28496 arm_print_asm_arch_directives (FILE *stream
, cl_target_option
*targ_options
)
28498 arm_build_target build_target
;
28499 /* If the target options haven't changed since the last time we were called
28500 there is nothing to do. This should be sufficient to suppress the
28501 majority of redundant work. */
28502 if (last_asm_targ_options
== targ_options
)
28505 last_asm_targ_options
= targ_options
;
28507 build_target
.isa
= sbitmap_alloc (isa_num_bits
);
28508 arm_configure_build_target (&build_target
, targ_options
, false);
28510 if (build_target
.core_name
28511 && !bitmap_bit_p (build_target
.isa
, isa_bit_quirk_no_asmcpu
))
28513 const char* truncated_name
28514 = arm_rewrite_selected_cpu (build_target
.core_name
);
28515 asm_fprintf (stream
, "\t.cpu %s\n", truncated_name
);
28518 const arch_option
*arch
28519 = arm_parse_arch_option_name (all_architectures
, "-march",
28520 build_target
.arch_name
);
28521 auto_sbitmap
opt_bits (isa_num_bits
);
28525 if (strcmp (build_target
.arch_name
, "armv7ve") == 0)
28527 /* Keep backward compatability for assemblers which don't support
28528 armv7ve. Fortunately, none of the following extensions are reset
28529 by a .fpu directive. */
28530 asm_fprintf (stream
, "\t.arch armv7-a\n");
28531 asm_fprintf (stream
, "\t.arch_extension virt\n");
28532 asm_fprintf (stream
, "\t.arch_extension idiv\n");
28533 asm_fprintf (stream
, "\t.arch_extension sec\n");
28534 asm_fprintf (stream
, "\t.arch_extension mp\n");
28537 asm_fprintf (stream
, "\t.arch %s\n", build_target
.arch_name
);
28539 /* The .fpu directive will reset any architecture extensions from the
28540 assembler that relate to the fp/vector extensions. So put this out before
28541 any .arch_extension directives. */
28542 const char *fpu_name
= (TARGET_SOFT_FLOAT
28544 : arm_identify_fpu_from_isa (build_target
.isa
));
28545 asm_fprintf (stream
, "\t.fpu %s\n", fpu_name
);
28547 if (!arch
->common
.extensions
)
28550 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
28556 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
28558 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28559 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28560 floating point instructions is disabled. So the following check
28561 restricts the printing of ".arch_extension mve" and
28562 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28563 this special behaviour because the feature bit "mve" and
28564 "mve_float" are not part of "fpu bits", so they are not cleared
28565 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28566 TARGET_HAVE_MVE_FLOAT are disabled. */
28567 if ((bitmap_bit_p (opt_bits
, isa_bit_mve
) && !TARGET_HAVE_MVE
)
28568 || (bitmap_bit_p (opt_bits
, isa_bit_mve_float
)
28569 && !TARGET_HAVE_MVE_FLOAT
))
28572 /* If every feature bit of this option is set in the target ISA
28573 specification, print out the option name. However, don't print
28574 anything if all the bits are part of the FPU specification. */
28575 if (bitmap_subset_p (opt_bits
, build_target
.isa
)
28576 && !bitmap_subset_p (opt_bits
, isa_all_fpubits_internal
))
28577 asm_fprintf (stream
, "\t.arch_extension %s\n", opt
->name
);
28583 arm_file_start (void)
28586 bool pac
= (aarch_ra_sign_scope
!= AARCH_FUNCTION_NONE
);
28587 bool bti
= (aarch_enable_bti
== 1);
28589 arm_print_asm_arch_directives
28590 (asm_out_file
, TREE_TARGET_OPTION (target_option_default_node
));
28594 /* If we have a named cpu, but we the assembler does not support that
28595 name via .cpu, put out a cpu name attribute; but don't do this if the
28596 name starts with the fictitious prefix, 'generic'. */
28597 if (arm_active_target
.core_name
28598 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_asmcpu
)
28599 && !startswith (arm_active_target
.core_name
, "generic"))
28601 const char* truncated_name
28602 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
28603 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_asmcpu
))
28604 asm_fprintf (asm_out_file
, "\t.eabi_attribute 5, \"%s\"\n",
28608 if (print_tune_info
)
28609 arm_print_tune_info ();
28611 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
28612 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28614 if (TARGET_HARD_FLOAT_ABI
)
28615 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28617 /* Some of these attributes only apply when the corresponding features
28618 are used. However we don't have any easy way of figuring this out.
28619 Conservatively record the setting that would have been used. */
28621 if (flag_rounding_math
)
28622 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28624 if (!flag_unsafe_math_optimizations
)
28626 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28627 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28629 if (flag_signaling_nans
)
28630 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28632 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28633 flag_finite_math_only
? 1 : 3);
28635 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28636 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28637 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28638 flag_short_enums
? 1 : 2);
28640 /* Tag_ABI_optimization_goals. */
28643 else if (optimize
>= 2)
28649 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
28651 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28654 if (arm_fp16_format
)
28655 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28656 (int) arm_fp16_format
);
28658 if (TARGET_HAVE_PACBTI
)
28660 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
28661 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
28663 else if (pac
|| bti
)
28665 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
28666 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
28670 arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
28672 arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
28674 if (arm_lang_output_object_attributes_hook
)
28675 arm_lang_output_object_attributes_hook();
28678 default_file_start ();
28682 arm_file_end (void)
28686 /* Just in case the last function output in the assembler had non-default
28687 architecture directives, we force the assembler state back to the default
28688 set, so that any 'calculated' build attributes are based on the default
28689 options rather than the special options for that function. */
28690 arm_print_asm_arch_directives
28691 (asm_out_file
, TREE_TARGET_OPTION (target_option_default_node
));
28693 if (NEED_INDICATE_EXEC_STACK
)
28694 /* Add .note.GNU-stack. */
28695 file_end_indicate_exec_stack ();
28697 if (! thumb_call_reg_needed
)
28700 switch_to_section (text_section
);
28701 asm_fprintf (asm_out_file
, "\t.code 16\n");
28702 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
28704 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
28706 rtx label
= thumb_call_via_label
[regno
];
28710 targetm
.asm_out
.internal_label (asm_out_file
, "L",
28711 CODE_LABEL_NUMBER (label
));
28712 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
28718 /* Symbols in the text segment can be accessed without indirecting via the
28719 constant pool; it may take an extra binary operation, but this is still
28720 faster than indirecting via memory. Don't do this when not optimizing,
28721 since we won't be calculating al of the offsets necessary to do this
28725 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
28727 if (optimize
> 0 && TREE_CONSTANT (decl
))
28728 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
28730 default_encode_section_info (decl
, rtl
, first
);
28732 #endif /* !ARM_PE */
28735 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
28737 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
28738 && !strcmp (prefix
, "L"))
28740 arm_ccfsm_state
= 0;
28741 arm_target_insn
= NULL
;
28743 default_internal_label (stream
, prefix
, labelno
);
28746 /* Define classes to generate code as RTL or output asm to a file.
28747 Using templates then allows to use the same code to output code
28748 sequences in the two formats. */
28749 class thumb1_const_rtl
28752 thumb1_const_rtl (rtx dst
) : dst (dst
) {}
28754 void mov (HOST_WIDE_INT val
)
28756 emit_set_insn (dst
, GEN_INT (val
));
28759 void add (HOST_WIDE_INT val
)
28761 emit_set_insn (dst
, gen_rtx_PLUS (SImode
, dst
, GEN_INT (val
)));
28764 void ashift (HOST_WIDE_INT shift
)
28766 emit_set_insn (dst
, gen_rtx_ASHIFT (SImode
, dst
, GEN_INT (shift
)));
28771 emit_set_insn (dst
, gen_rtx_NEG (SImode
, dst
));
28778 class thumb1_const_print
28781 thumb1_const_print (FILE *f
, int regno
)
28784 dst_regname
= reg_names
[regno
];
28787 void mov (HOST_WIDE_INT val
)
28789 asm_fprintf (t_file
, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
28793 void add (HOST_WIDE_INT val
)
28795 asm_fprintf (t_file
, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
28799 void ashift (HOST_WIDE_INT shift
)
28801 asm_fprintf (t_file
, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
28802 dst_regname
, shift
);
28807 asm_fprintf (t_file
, "\trsbs\t%s, #0\n", dst_regname
);
28812 const char *dst_regname
;
28815 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28816 Avoid generating useless code when one of the bytes is zero. */
28819 thumb1_gen_const_int_1 (T dst
, HOST_WIDE_INT op1
)
28821 bool mov_done_p
= false;
28822 unsigned HOST_WIDE_INT val
= op1
;
28826 gcc_assert (op1
== trunc_int_for_mode (op1
, SImode
));
28834 /* For negative numbers with the first nine bits set, build the
28835 opposite of OP1, then negate it, it's generally shorter and not
28837 if ((val
& 0xFF800000) == 0xFF800000)
28839 thumb1_gen_const_int_1 (dst
, -op1
);
28844 /* In the general case, we need 7 instructions to build
28845 a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28846 do better if VAL is small enough, or
28847 right-shiftable by a suitable amount. If the
28848 right-shift enables to encode at least one less byte,
28849 it's worth it: we save a adds and a lsls at the
28850 expense of a final lsls. */
28851 int final_shift
= number_of_first_bit_set (val
);
28853 int leading_zeroes
= clz_hwi (val
);
28854 int number_of_bytes_needed
28855 = ((HOST_BITS_PER_WIDE_INT
- 1 - leading_zeroes
)
28856 / BITS_PER_UNIT
) + 1;
28857 int number_of_bytes_needed2
28858 = ((HOST_BITS_PER_WIDE_INT
- 1 - leading_zeroes
- final_shift
)
28859 / BITS_PER_UNIT
) + 1;
28861 if (number_of_bytes_needed2
< number_of_bytes_needed
)
28862 val
>>= final_shift
;
28866 /* If we are in a very small range, we can use either a single movs
28872 unsigned HOST_WIDE_INT high
= val
- 255;
28880 if (final_shift
> 0)
28881 dst
.ashift (final_shift
);
28885 /* General case, emit upper 3 bytes as needed. */
28886 for (i
= 0; i
< 3; i
++)
28888 unsigned HOST_WIDE_INT byte
= (val
>> (8 * (3 - i
))) & 0xff;
28892 /* We are about to emit new bits, stop accumulating a
28893 shift amount, and left-shift only if we have already
28894 emitted some upper bits. */
28897 dst
.ashift (shift
);
28903 /* Stop accumulating shift amount since we've just
28904 emitted some bits. */
28914 /* Emit lower byte. */
28916 dst
.mov (val
& 0xff);
28919 dst
.ashift (shift
);
28921 dst
.add (val
& 0xff);
28924 if (final_shift
> 0)
28925 dst
.ashift (final_shift
);
28929 /* Proxies for thumb1.md, since the thumb1_const_print and
28930 thumb1_const_rtl classes are not exported. */
28932 thumb1_gen_const_int_rtl (rtx dst
, HOST_WIDE_INT op1
)
28934 thumb1_const_rtl
t (dst
);
28935 thumb1_gen_const_int_1 (t
, op1
);
28939 thumb1_gen_const_int_print (rtx dst
, HOST_WIDE_INT op1
)
28941 thumb1_const_print
t (asm_out_file
, REGNO (dst
));
28942 thumb1_gen_const_int_1 (t
, op1
);
28945 /* Output code to add DELTA to the first argument, and then jump
28946 to FUNCTION. Used for C++ multiple inheritance. */
28949 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
28950 HOST_WIDE_INT
, tree function
)
28952 static int thunk_label
= 0;
28955 int mi_delta
= delta
;
28956 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
28958 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
28961 mi_delta
= - mi_delta
;
28963 final_start_function (emit_barrier (), file
, 1);
28967 int labelno
= thunk_label
++;
28968 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
28969 /* Thunks are entered in arm mode when available. */
28970 if (TARGET_THUMB1_ONLY
)
28972 /* push r3 so we can use it as a temporary. */
28973 /* TODO: Omit this save if r3 is not used. */
28974 fputs ("\tpush {r3}\n", file
);
28976 /* With -mpure-code, we cannot load the address from the
28977 constant pool: we build it explicitly. */
28978 if (target_pure_code
)
28980 fputs ("\tmovs\tr3, #:upper8_15:#", file
);
28981 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28982 fputc ('\n', file
);
28983 fputs ("\tlsls r3, #8\n", file
);
28984 fputs ("\tadds\tr3, #:upper0_7:#", file
);
28985 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28986 fputc ('\n', file
);
28987 fputs ("\tlsls r3, #8\n", file
);
28988 fputs ("\tadds\tr3, #:lower8_15:#", file
);
28989 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28990 fputc ('\n', file
);
28991 fputs ("\tlsls r3, #8\n", file
);
28992 fputs ("\tadds\tr3, #:lower0_7:#", file
);
28993 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28994 fputc ('\n', file
);
28997 fputs ("\tldr\tr3, ", file
);
29001 fputs ("\tldr\tr12, ", file
);
29004 if (!target_pure_code
)
29006 assemble_name (file
, label
);
29007 fputc ('\n', file
);
29012 /* If we are generating PIC, the ldr instruction below loads
29013 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
29014 the address of the add + 8, so we have:
29016 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
29019 Note that we have "+ 1" because some versions of GNU ld
29020 don't set the low bit of the result for R_ARM_REL32
29021 relocations against thumb function symbols.
29022 On ARMv6M this is +4, not +8. */
29023 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
29024 assemble_name (file
, labelpc
);
29025 fputs (":\n", file
);
29026 if (TARGET_THUMB1_ONLY
)
29028 /* This is 2 insns after the start of the thunk, so we know it
29029 is 4-byte aligned. */
29030 fputs ("\tadd\tr3, pc, r3\n", file
);
29031 fputs ("\tmov r12, r3\n", file
);
29034 fputs ("\tadd\tr12, pc, r12\n", file
);
29036 else if (TARGET_THUMB1_ONLY
)
29037 fputs ("\tmov r12, r3\n", file
);
29039 if (TARGET_THUMB1_ONLY
)
29041 if (mi_delta
> 255)
29043 /* With -mpure-code, we cannot load MI_DELTA from the
29044 constant pool: we build it explicitly. */
29045 if (target_pure_code
)
29047 thumb1_const_print
r3 (file
, 3);
29048 thumb1_gen_const_int_1 (r3
, mi_delta
);
29052 fputs ("\tldr\tr3, ", file
);
29053 assemble_name (file
, label
);
29054 fputs ("+4\n", file
);
29056 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
29057 mi_op
, this_regno
, this_regno
);
29059 else if (mi_delta
!= 0)
29061 /* Thumb1 unified syntax requires s suffix in instruction name when
29062 one of the operands is immediate. */
29063 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
29064 mi_op
, this_regno
, this_regno
,
29070 /* TODO: Use movw/movt for large constants when available. */
29071 while (mi_delta
!= 0)
29073 if ((mi_delta
& (3 << shift
)) == 0)
29077 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
29078 mi_op
, this_regno
, this_regno
,
29079 mi_delta
& (0xff << shift
));
29080 mi_delta
&= ~(0xff << shift
);
29087 if (TARGET_THUMB1_ONLY
)
29088 fputs ("\tpop\t{r3}\n", file
);
29090 fprintf (file
, "\tbx\tr12\n");
29092 /* With -mpure-code, we don't need to emit literals for the
29093 function address and delta since we emitted code to build
29095 if (!target_pure_code
)
29097 ASM_OUTPUT_ALIGN (file
, 2);
29098 assemble_name (file
, label
);
29099 fputs (":\n", file
);
29102 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
29103 rtx tem
= XEXP (DECL_RTL (function
), 0);
29104 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
29105 pipeline offset is four rather than eight. Adjust the offset
29107 tem
= plus_constant (GET_MODE (tem
), tem
,
29108 TARGET_THUMB1_ONLY
? -3 : -7);
29109 tem
= gen_rtx_MINUS (GET_MODE (tem
),
29111 gen_rtx_SYMBOL_REF (Pmode
,
29112 ggc_strdup (labelpc
)));
29113 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
29116 /* Output ".word .LTHUNKn". */
29117 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
29119 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
29120 assemble_integer (GEN_INT (mi_delta
), 4, BITS_PER_WORD
, 1);
29125 fputs ("\tb\t", file
);
29126 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29127 if (NEED_PLT_RELOC
)
29128 fputs ("(PLT)", file
);
29129 fputc ('\n', file
);
29132 final_end_function ();
29135 /* MI thunk handling for TARGET_32BIT. */
29138 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
29139 HOST_WIDE_INT vcall_offset
, tree function
)
29141 const bool long_call_p
= arm_is_long_call_p (function
);
29143 /* On ARM, this_regno is R0 or R1 depending on
29144 whether the function returns an aggregate or not.
29146 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
29148 ? R1_REGNUM
: R0_REGNUM
);
29150 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
29151 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
29152 reload_completed
= 1;
29153 emit_note (NOTE_INSN_PROLOGUE_END
);
29155 /* Add DELTA to THIS_RTX. */
29157 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
29158 delta
, this_rtx
, this_rtx
, false);
29160 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
29161 if (vcall_offset
!= 0)
29163 /* Load *THIS_RTX. */
29164 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
29165 /* Compute *THIS_RTX + VCALL_OFFSET. */
29166 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
29168 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
29169 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
29170 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
29173 /* Generate a tail call to the target function. */
29174 if (!TREE_USED (function
))
29176 assemble_external (function
);
29177 TREE_USED (function
) = 1;
29179 rtx funexp
= XEXP (DECL_RTL (function
), 0);
29182 emit_move_insn (temp
, funexp
);
29185 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
29186 rtx_insn
*insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
29187 SIBLING_CALL_P (insn
) = 1;
29190 /* Indirect calls require a bit of fixup in PIC mode. */
29193 split_all_insns_noflow ();
29197 insn
= get_insns ();
29198 shorten_branches (insn
);
29199 final_start_function (insn
, file
, 1);
29200 final (insn
, file
, 1);
29201 final_end_function ();
29203 /* Stop pretending this is a post-reload pass. */
29204 reload_completed
= 0;
29207 /* Output code to add DELTA to the first argument, and then jump
29208 to FUNCTION. Used for C++ multiple inheritance. */
29211 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
29212 HOST_WIDE_INT vcall_offset
, tree function
)
29214 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk
));
29216 assemble_start_function (thunk
, fnname
);
29218 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
29220 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
29221 assemble_end_function (thunk
, fnname
);
29225 arm_emit_vector_const (FILE *file
, rtx x
)
29228 const char * pattern
;
29230 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
29232 switch (GET_MODE (x
))
29234 case E_V2SImode
: pattern
= "%08x"; break;
29235 case E_V4HImode
: pattern
= "%04x"; break;
29236 case E_V8QImode
: pattern
= "%02x"; break;
29237 default: gcc_unreachable ();
29240 fprintf (file
, "0x");
29241 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
29245 element
= CONST_VECTOR_ELT (x
, i
);
29246 fprintf (file
, pattern
, INTVAL (element
));
29252 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29253 HFmode constant pool entries are actually loaded with ldr. */
29255 arm_emit_fp16_const (rtx c
)
29259 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
29260 if (WORDS_BIG_ENDIAN
)
29261 assemble_zeros (2);
29262 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
29263 if (!WORDS_BIG_ENDIAN
)
29264 assemble_zeros (2);
29268 arm_output_load_gr (rtx
*operands
)
29275 if (!MEM_P (operands
[1])
29276 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
29277 || !REG_P (reg
= XEXP (sum
, 0))
29278 || !CONST_INT_P (offset
= XEXP (sum
, 1))
29279 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
29280 return "wldrw%?\t%0, %1";
29282 /* Fix up an out-of-range load of a GR register. */
29283 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
29284 wcgr
= operands
[0];
29286 output_asm_insn ("ldr%?\t%0, %1", operands
);
29288 operands
[0] = wcgr
;
29290 output_asm_insn ("tmcr%?\t%0, %1", operands
);
29291 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
29296 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29298 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29299 named arg and all anonymous args onto the stack.
29300 XXX I know the prologue shouldn't be pushing registers, but it is faster
29304 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
29305 const function_arg_info
&arg
,
29307 int second_time ATTRIBUTE_UNUSED
)
29309 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
29312 cfun
->machine
->uses_anonymous_args
= 1;
29313 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
29315 nregs
= pcum
->aapcs_ncrn
;
29316 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl
))
29319 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
29320 if (res
< 0 && warn_psabi
)
29321 inform (input_location
, "parameter passing for argument of "
29322 "type %qT changed in GCC 7.1", arg
.type
);
29326 if (res
> 1 && warn_psabi
)
29327 inform (input_location
,
29328 "parameter passing for argument of type "
29329 "%qT changed in GCC 9.1", arg
.type
);
29334 nregs
= pcum
->nregs
;
29336 if (nregs
< NUM_ARG_REGS
)
29337 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
29340 /* We can't rely on the caller doing the proper promotion when
29341 using APCS or ATPCS. */
29344 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
29346 return !TARGET_AAPCS_BASED
;
29349 static machine_mode
29350 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
29352 int *punsignedp ATTRIBUTE_UNUSED
,
29353 const_tree fntype ATTRIBUTE_UNUSED
,
29354 int for_return ATTRIBUTE_UNUSED
)
29356 if (GET_MODE_CLASS (mode
) == MODE_INT
29357 && GET_MODE_SIZE (mode
) < 4)
29365 arm_default_short_enums (void)
29367 return ARM_DEFAULT_SHORT_ENUMS
;
29371 /* AAPCS requires that anonymous bitfields affect structure alignment. */
29374 arm_align_anon_bitfield (void)
29376 return TARGET_AAPCS_BASED
;
29380 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
29383 arm_cxx_guard_type (void)
29385 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
29389 /* The EABI says test the least significant bit of a guard variable. */
29392 arm_cxx_guard_mask_bit (void)
29394 return TARGET_AAPCS_BASED
;
29398 /* The EABI specifies that all array cookies are 8 bytes long. */
29401 arm_get_cookie_size (tree type
)
29405 if (!TARGET_AAPCS_BASED
)
29406 return default_cxx_get_cookie_size (type
);
29408 size
= build_int_cst (sizetype
, 8);
29413 /* The EABI says that array cookies should also contain the element size. */
29416 arm_cookie_has_size (void)
29418 return TARGET_AAPCS_BASED
;
29422 /* The EABI says constructors and destructors should return a pointer to
29423 the object constructed/destroyed. */
29426 arm_cxx_cdtor_returns_this (void)
29428 return TARGET_AAPCS_BASED
;
29431 /* The EABI says that an inline function may never be the key
29435 arm_cxx_key_method_may_be_inline (void)
29437 return !TARGET_AAPCS_BASED
;
29441 arm_cxx_determine_class_data_visibility (tree decl
)
29443 if (!TARGET_AAPCS_BASED
29444 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
29447 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29448 is exported. However, on systems without dynamic vague linkage,
29449 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
29450 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
29451 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
29453 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
29454 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
29458 arm_cxx_class_data_always_comdat (void)
29460 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29461 vague linkage if the class has no key function. */
29462 return !TARGET_AAPCS_BASED
;
29466 /* The EABI says __aeabi_atexit should be used to register static
29470 arm_cxx_use_aeabi_atexit (void)
29472 return TARGET_AAPCS_BASED
;
29477 arm_set_return_address (rtx source
, rtx scratch
)
29479 arm_stack_offsets
*offsets
;
29480 HOST_WIDE_INT delta
;
29482 unsigned long saved_regs
;
29484 offsets
= arm_get_frame_offsets ();
29485 saved_regs
= offsets
->saved_regs_mask
;
29487 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
29488 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
29491 if (frame_pointer_needed
)
29492 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
29495 /* LR will be the first saved register. */
29496 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
29501 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
29502 GEN_INT (delta
& ~4095)));
29507 addr
= stack_pointer_rtx
;
29509 addr
= plus_constant (Pmode
, addr
, delta
);
29512 /* The store needs to be marked to prevent DSE from deleting
29513 it as dead if it is based on fp. */
29514 mem
= gen_frame_mem (Pmode
, addr
);
29515 MEM_VOLATILE_P (mem
) = true;
29516 emit_move_insn (mem
, source
);
29522 thumb_set_return_address (rtx source
, rtx scratch
)
29524 arm_stack_offsets
*offsets
;
29525 HOST_WIDE_INT delta
;
29526 HOST_WIDE_INT limit
;
29529 unsigned long mask
;
29533 offsets
= arm_get_frame_offsets ();
29534 mask
= offsets
->saved_regs_mask
;
29535 if (mask
& (1 << LR_REGNUM
))
29538 /* Find the saved regs. */
29539 if (frame_pointer_needed
)
29541 delta
= offsets
->soft_frame
- offsets
->saved_args
;
29542 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
29548 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
29551 /* Allow for the stack frame. */
29552 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
29554 /* The link register is always the first saved register. */
29557 /* Construct the address. */
29558 addr
= gen_rtx_REG (SImode
, reg
);
29561 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
29562 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
29566 addr
= plus_constant (Pmode
, addr
, delta
);
29568 /* The store needs to be marked to prevent DSE from deleting
29569 it as dead if it is based on fp. */
29570 mem
= gen_frame_mem (Pmode
, addr
);
29571 MEM_VOLATILE_P (mem
) = true;
29572 emit_move_insn (mem
, source
);
29575 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
29578 /* Implements target hook vector_mode_supported_p. */
29580 arm_vector_mode_supported_p (machine_mode mode
)
29582 /* Neon also supports V2SImode, etc. listed in the clause below. */
29583 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
29584 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
29585 || mode
== V2DImode
|| mode
== V8HFmode
|| mode
== V4BFmode
29586 || mode
== V8BFmode
))
29589 if ((TARGET_NEON
|| TARGET_IWMMXT
)
29590 && ((mode
== V2SImode
)
29591 || (mode
== V4HImode
)
29592 || (mode
== V8QImode
)))
29595 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
29596 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
29597 || mode
== V2HAmode
))
29600 if (TARGET_HAVE_MVE
29601 && (VALID_MVE_SI_MODE (mode
) || VALID_MVE_PRED_MODE (mode
)))
29604 if (TARGET_HAVE_MVE_FLOAT
29605 && (mode
== V2DFmode
|| mode
== V4SFmode
|| mode
== V8HFmode
))
29611 /* Implements target hook array_mode_supported_p. */
29614 arm_array_mode_supported_p (machine_mode mode
,
29615 unsigned HOST_WIDE_INT nelems
)
29617 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29618 for now, as the lane-swapping logic needs to be extended in the expanders.
29619 See PR target/82518. */
29620 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
29621 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
29622 && (nelems
>= 2 && nelems
<= 4))
29625 if (TARGET_HAVE_MVE
&& !BYTES_BIG_ENDIAN
29626 && VALID_MVE_MODE (mode
) && (nelems
== 2 || nelems
== 4))
29632 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29633 registers when autovectorizing for Neon, at least until multiple vector
29634 widths are supported properly by the middle-end. */
29636 static machine_mode
29637 arm_preferred_simd_mode (scalar_mode mode
)
29643 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HFmode
: V8HFmode
;
29645 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
29647 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
29649 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
29651 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
29653 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
29660 if (TARGET_REALLY_IWMMXT
)
29673 if (TARGET_HAVE_MVE
)
29686 if (TARGET_HAVE_MVE_FLOAT
)
29700 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29702 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29703 using r0-r4 for function arguments, r7 for the stack frame and don't have
29704 enough left over to do doubleword arithmetic. For Thumb-2 all the
29705 potentially problematic instructions accept high registers so this is not
29706 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29707 that require many low registers. */
29709 arm_class_likely_spilled_p (reg_class_t rclass
)
29711 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
29712 || rclass
== CC_REG
)
29715 return default_class_likely_spilled_p (rclass
);
29718 /* Implements target hook small_register_classes_for_mode_p. */
29720 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
29722 return TARGET_THUMB1
;
29725 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29726 ARM insns and therefore guarantee that the shift count is modulo 256.
29727 DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29728 guarantee no particular behavior for out-of-range counts. */
29730 static unsigned HOST_WIDE_INT
29731 arm_shift_truncation_mask (machine_mode mode
)
29733 return mode
== SImode
? 255 : 0;
29737 /* Map internal gcc register numbers to DWARF2 register numbers. */
29740 arm_debugger_regno (unsigned int regno
)
29745 if (IS_VFP_REGNUM (regno
))
29747 /* See comment in arm_dwarf_register_span. */
29748 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29749 return 64 + regno
- FIRST_VFP_REGNUM
;
29751 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
29754 if (IS_IWMMXT_GR_REGNUM (regno
))
29755 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
29757 if (IS_IWMMXT_REGNUM (regno
))
29758 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
29760 if (IS_PAC_REGNUM (regno
))
29761 return DWARF_PAC_REGNUM
;
29763 return DWARF_FRAME_REGISTERS
;
29766 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29767 GCC models tham as 64 32-bit registers, so we need to describe this to
29768 the DWARF generation code. Other registers can use the default. */
29770 arm_dwarf_register_span (rtx rtl
)
29778 regno
= REGNO (rtl
);
29779 if (!IS_VFP_REGNUM (regno
))
29782 /* XXX FIXME: The EABI defines two VFP register ranges:
29783 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29785 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29786 corresponding D register. Until GDB supports this, we shall use the
29787 legacy encodings. We also use these encodings for D0-D15 for
29788 compatibility with older debuggers. */
29789 mode
= GET_MODE (rtl
);
29790 if (GET_MODE_SIZE (mode
) < 8)
29793 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29795 nregs
= GET_MODE_SIZE (mode
) / 4;
29796 for (i
= 0; i
< nregs
; i
+= 2)
29797 if (TARGET_BIG_END
)
29799 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29800 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
29804 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
29805 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29810 nregs
= GET_MODE_SIZE (mode
) / 8;
29811 for (i
= 0; i
< nregs
; i
++)
29812 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
29815 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
29818 #if ARM_UNWIND_INFO
29819 /* Emit unwind directives for a store-multiple instruction or stack pointer
29820 push during alignment.
29821 These should only ever be generated by the function prologue code, so
29822 expect them to have a particular form.
29823 The store-multiple instruction sometimes pushes pc as the last register,
29824 although it should not be tracked into unwind information, or for -Os
29825 sometimes pushes some dummy registers before first register that needs
29826 to be tracked in unwind information; such dummy registers are there just
29827 to avoid separate stack adjustment, and will not be restored in the
29831 arm_unwind_emit_sequence (FILE * out_file
, rtx p
)
29834 HOST_WIDE_INT offset
;
29835 HOST_WIDE_INT nregs
;
29839 unsigned padfirst
= 0, padlast
= 0;
29842 e
= XVECEXP (p
, 0, 0);
29843 gcc_assert (GET_CODE (e
) == SET
);
29845 /* First insn will adjust the stack pointer. */
29846 gcc_assert (GET_CODE (e
) == SET
29847 && REG_P (SET_DEST (e
))
29848 && REGNO (SET_DEST (e
)) == SP_REGNUM
29849 && GET_CODE (SET_SRC (e
)) == PLUS
);
29851 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
29852 nregs
= XVECLEN (p
, 0) - 1;
29853 gcc_assert (nregs
);
29855 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
29856 if (reg
< 16 || IS_PAC_REGNUM (reg
))
29858 /* For -Os dummy registers can be pushed at the beginning to
29859 avoid separate stack pointer adjustment. */
29860 e
= XVECEXP (p
, 0, 1);
29861 e
= XEXP (SET_DEST (e
), 0);
29862 if (GET_CODE (e
) == PLUS
)
29863 padfirst
= INTVAL (XEXP (e
, 1));
29864 gcc_assert (padfirst
== 0 || optimize_size
);
29865 /* The function prologue may also push pc, but not annotate it as it is
29866 never restored. We turn this into a stack pointer adjustment. */
29867 e
= XVECEXP (p
, 0, nregs
);
29868 e
= XEXP (SET_DEST (e
), 0);
29869 if (GET_CODE (e
) == PLUS
)
29870 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
29872 padlast
= offset
- 4;
29873 gcc_assert (padlast
== 0 || padlast
== 4);
29875 fprintf (out_file
, "\t.pad #4\n");
29877 fprintf (out_file
, "\t.save {");
29879 else if (IS_VFP_REGNUM (reg
))
29882 fprintf (out_file
, "\t.vsave {");
29885 /* Unknown register type. */
29886 gcc_unreachable ();
29888 /* If the stack increment doesn't match the size of the saved registers,
29889 something has gone horribly wrong. */
29890 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
29894 /* The remaining insns will describe the stores. */
29895 for (i
= 1; i
<= nregs
; i
++)
29897 /* Expect (set (mem <addr>) (reg)).
29898 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29899 e
= XVECEXP (p
, 0, i
);
29900 gcc_assert (GET_CODE (e
) == SET
29901 && MEM_P (SET_DEST (e
))
29902 && REG_P (SET_SRC (e
)));
29904 reg
= REGNO (SET_SRC (e
));
29905 gcc_assert (reg
>= lastreg
);
29908 fprintf (out_file
, ", ");
29909 /* We can't use %r for vfp because we need to use the
29910 double precision register names. */
29911 if (IS_VFP_REGNUM (reg
))
29912 asm_fprintf (out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
29913 else if (IS_PAC_REGNUM (reg
))
29914 asm_fprintf (asm_out_file
, "ra_auth_code");
29916 asm_fprintf (out_file
, "%r", reg
);
29920 /* Check that the addresses are consecutive. */
29921 e
= XEXP (SET_DEST (e
), 0);
29922 if (GET_CODE (e
) == PLUS
)
29923 gcc_assert (REG_P (XEXP (e
, 0))
29924 && REGNO (XEXP (e
, 0)) == SP_REGNUM
29925 && CONST_INT_P (XEXP (e
, 1))
29926 && offset
== INTVAL (XEXP (e
, 1)));
29930 && REGNO (e
) == SP_REGNUM
);
29931 offset
+= reg_size
;
29934 fprintf (out_file
, "}\n");
29936 fprintf (out_file
, "\t.pad #%d\n", padfirst
);
29939 /* Emit unwind directives for a SET. */
29942 arm_unwind_emit_set (FILE * out_file
, rtx p
)
29950 switch (GET_CODE (e0
))
29953 /* Pushing a single register. */
29954 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
29955 || !REG_P (XEXP (XEXP (e0
, 0), 0))
29956 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
29959 asm_fprintf (out_file
, "\t.save ");
29960 if (IS_VFP_REGNUM (REGNO (e1
)))
29961 asm_fprintf(out_file
, "{d%d}\n",
29962 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
29964 asm_fprintf(out_file
, "{%r}\n", REGNO (e1
));
29968 if (REGNO (e0
) == SP_REGNUM
)
29970 /* A stack increment. */
29971 if (GET_CODE (e1
) != PLUS
29972 || !REG_P (XEXP (e1
, 0))
29973 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
29974 || !CONST_INT_P (XEXP (e1
, 1)))
29977 asm_fprintf (out_file
, "\t.pad #%wd\n",
29978 -INTVAL (XEXP (e1
, 1)));
29980 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
29982 HOST_WIDE_INT offset
;
29984 if (GET_CODE (e1
) == PLUS
)
29986 if (!REG_P (XEXP (e1
, 0))
29987 || !CONST_INT_P (XEXP (e1
, 1)))
29989 reg
= REGNO (XEXP (e1
, 0));
29990 offset
= INTVAL (XEXP (e1
, 1));
29991 asm_fprintf (out_file
, "\t.setfp %r, %r, #%wd\n",
29992 HARD_FRAME_POINTER_REGNUM
, reg
,
29995 else if (REG_P (e1
))
29998 asm_fprintf (out_file
, "\t.setfp %r, %r\n",
29999 HARD_FRAME_POINTER_REGNUM
, reg
);
30004 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
30006 /* Move from sp to reg. */
30007 asm_fprintf (out_file
, "\t.movsp %r\n", REGNO (e0
));
30009 else if (GET_CODE (e1
) == PLUS
30010 && REG_P (XEXP (e1
, 0))
30011 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
30012 && CONST_INT_P (XEXP (e1
, 1)))
30014 /* Set reg to offset from sp. */
30015 asm_fprintf (out_file
, "\t.movsp %r, #%d\n",
30016 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
30018 else if (REGNO (e0
) == IP_REGNUM
&& arm_current_function_pac_enabled_p ())
30020 if (cfun
->machine
->pacspval_needed
)
30021 asm_fprintf (out_file
, "\t.pacspval\n");
30033 /* Emit unwind directives for the given insn. */
30036 arm_unwind_emit (FILE * out_file
, rtx_insn
*insn
)
30039 bool handled_one
= false;
30041 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
30044 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
30045 && (TREE_NOTHROW (current_function_decl
)
30046 || crtl
->all_throwers_are_sibcalls
))
30049 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
30052 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
30054 switch (REG_NOTE_KIND (note
))
30056 case REG_FRAME_RELATED_EXPR
:
30057 pat
= XEXP (note
, 0);
30060 case REG_CFA_REGISTER
:
30061 pat
= XEXP (note
, 0);
30064 pat
= PATTERN (insn
);
30065 if (GET_CODE (pat
) == PARALLEL
)
30066 pat
= XVECEXP (pat
, 0, 0);
30069 /* Only emitted for IS_STACKALIGN re-alignment. */
30074 src
= SET_SRC (pat
);
30075 dest
= SET_DEST (pat
);
30077 gcc_assert (src
== stack_pointer_rtx
30078 || IS_PAC_REGNUM (REGNO (src
)));
30079 reg
= REGNO (dest
);
30081 if (IS_PAC_REGNUM (REGNO (src
)))
30082 arm_unwind_emit_set (out_file
, PATTERN (insn
));
30084 asm_fprintf (out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
30087 handled_one
= true;
30090 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
30091 to get correct dwarf information for shrink-wrap. We should not
30092 emit unwind information for it because these are used either for
30093 pretend arguments or notes to adjust sp and restore registers from
30095 case REG_CFA_DEF_CFA
:
30096 case REG_CFA_ADJUST_CFA
:
30097 case REG_CFA_RESTORE
:
30100 case REG_CFA_EXPRESSION
:
30101 case REG_CFA_OFFSET
:
30102 /* ??? Only handling here what we actually emit. */
30103 gcc_unreachable ();
30111 pat
= PATTERN (insn
);
30114 switch (GET_CODE (pat
))
30117 arm_unwind_emit_set (out_file
, pat
);
30121 /* Store multiple. */
30122 arm_unwind_emit_sequence (out_file
, pat
);
30131 /* Output a reference from a function exception table to the type_info
30132 object X. The EABI specifies that the symbol should be relocated by
30133 an R_ARM_TARGET2 relocation. */
30136 arm_output_ttype (rtx x
)
30138 fputs ("\t.word\t", asm_out_file
);
30139 output_addr_const (asm_out_file
, x
);
30140 /* Use special relocations for symbol references. */
30141 if (!CONST_INT_P (x
))
30142 fputs ("(TARGET2)", asm_out_file
);
30143 fputc ('\n', asm_out_file
);
30148 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
30151 arm_asm_emit_except_personality (rtx personality
)
30153 fputs ("\t.personality\t", asm_out_file
);
30154 output_addr_const (asm_out_file
, personality
);
30155 fputc ('\n', asm_out_file
);
30157 #endif /* ARM_UNWIND_INFO */
30159 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
30162 arm_asm_init_sections (void)
30164 #if ARM_UNWIND_INFO
30165 exception_section
= get_unnamed_section (0, output_section_asm_op
,
30167 #endif /* ARM_UNWIND_INFO */
30169 #ifdef OBJECT_FORMAT_ELF
30170 if (target_pure_code
)
30171 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
30175 /* Output unwind directives for the start/end of a function. */
30178 arm_output_fn_unwind (FILE * f
, bool prologue
)
30180 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
30184 fputs ("\t.fnstart\n", f
);
30187 /* If this function will never be unwound, then mark it as such.
30188 The came condition is used in arm_unwind_emit to suppress
30189 the frame annotations. */
30190 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
30191 && (TREE_NOTHROW (current_function_decl
)
30192 || crtl
->all_throwers_are_sibcalls
))
30193 fputs("\t.cantunwind\n", f
);
30195 fputs ("\t.fnend\n", f
);
30200 arm_emit_tls_decoration (FILE *fp
, rtx x
)
30202 enum tls_reloc reloc
;
30205 val
= XVECEXP (x
, 0, 0);
30206 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
30208 output_addr_const (fp
, val
);
30213 fputs ("(tlsgd)", fp
);
30215 case TLS_GD32_FDPIC
:
30216 fputs ("(tlsgd_fdpic)", fp
);
30219 fputs ("(tlsldm)", fp
);
30221 case TLS_LDM32_FDPIC
:
30222 fputs ("(tlsldm_fdpic)", fp
);
30225 fputs ("(tlsldo)", fp
);
30228 fputs ("(gottpoff)", fp
);
30230 case TLS_IE32_FDPIC
:
30231 fputs ("(gottpoff_fdpic)", fp
);
30234 fputs ("(tpoff)", fp
);
30237 fputs ("(tlsdesc)", fp
);
30240 gcc_unreachable ();
30249 fputs (" + (. - ", fp
);
30250 output_addr_const (fp
, XVECEXP (x
, 0, 2));
30251 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30252 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
30253 output_addr_const (fp
, XVECEXP (x
, 0, 3));
30263 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
30266 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
30268 gcc_assert (size
== 4);
30269 fputs ("\t.word\t", file
);
30270 output_addr_const (file
, x
);
30271 fputs ("(tlsldo)", file
);
30274 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
30277 arm_output_addr_const_extra (FILE *fp
, rtx x
)
30279 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
30280 return arm_emit_tls_decoration (fp
, x
);
30281 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
30284 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
30286 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
30287 assemble_name_raw (fp
, label
);
30291 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
30293 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
30297 output_addr_const (fp
, XVECEXP (x
, 0, 0));
30301 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
30303 output_addr_const (fp
, XVECEXP (x
, 0, 0));
30307 output_addr_const (fp
, XVECEXP (x
, 0, 1));
30311 else if (GET_CODE (x
) == CONST_VECTOR
)
30312 return arm_emit_vector_const (fp
, x
);
30317 /* Output assembly for a shift instruction.
30318 SET_FLAGS determines how the instruction modifies the condition codes.
30319 0 - Do not set condition codes.
30320 1 - Set condition codes.
30321 2 - Use smallest instruction. */
30323 arm_output_shift(rtx
* operands
, int set_flags
)
30326 static const char flag_chars
[3] = {'?', '.', '!'};
30331 c
= flag_chars
[set_flags
];
30332 shift
= shift_op(operands
[3], &val
);
30336 operands
[2] = GEN_INT(val
);
30337 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
30340 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
30342 output_asm_insn (pattern
, operands
);
30346 /* Output assembly for a WMMX immediate shift instruction. */
30348 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
30350 int shift
= INTVAL (operands
[2]);
30352 machine_mode opmode
= GET_MODE (operands
[0]);
30354 gcc_assert (shift
>= 0);
30356 /* If the shift value in the register versions is > 63 (for D qualifier),
30357 31 (for W qualifier) or 15 (for H qualifier). */
30358 if (((opmode
== V4HImode
) && (shift
> 15))
30359 || ((opmode
== V2SImode
) && (shift
> 31))
30360 || ((opmode
== DImode
) && (shift
> 63)))
30364 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
30365 output_asm_insn (templ
, operands
);
30366 if (opmode
== DImode
)
30368 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
30369 output_asm_insn (templ
, operands
);
30374 /* The destination register will contain all zeros. */
30375 sprintf (templ
, "wzero\t%%0");
30376 output_asm_insn (templ
, operands
);
30381 if ((opmode
== DImode
) && (shift
> 32))
30383 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
30384 output_asm_insn (templ
, operands
);
30385 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
30386 output_asm_insn (templ
, operands
);
30390 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
30391 output_asm_insn (templ
, operands
);
30396 /* Output assembly for a WMMX tinsr instruction. */
30398 arm_output_iwmmxt_tinsr (rtx
*operands
)
30400 int mask
= INTVAL (operands
[3]);
30403 int units
= mode_nunits
[GET_MODE (operands
[0])];
30404 gcc_assert ((mask
& (mask
- 1)) == 0);
30405 for (i
= 0; i
< units
; ++i
)
30407 if ((mask
& 0x01) == 1)
30413 gcc_assert (i
< units
);
30415 switch (GET_MODE (operands
[0]))
30418 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
30421 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
30424 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
30427 gcc_unreachable ();
30430 output_asm_insn (templ
, operands
);
30435 /* Output a Thumb-1 casesi dispatch sequence. */
30437 thumb1_output_casesi (rtx
*operands
)
30439 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
30441 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
30443 switch (GET_MODE(diff_vec
))
30446 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
30447 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30449 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
30450 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30452 return "bl\t%___gnu_thumb1_case_si";
30454 gcc_unreachable ();
30458 /* Output a Thumb-2 casesi instruction. */
30460 thumb2_output_casesi (rtx
*operands
)
30462 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
30464 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
30466 output_asm_insn ("cmp\t%0, %1", operands
);
30467 output_asm_insn ("bhi\t%l3", operands
);
30468 switch (GET_MODE(diff_vec
))
30471 return "tbb\t[%|pc, %0]";
30473 return "tbh\t[%|pc, %0, lsl #1]";
30477 output_asm_insn ("adr\t%4, %l2", operands
);
30478 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
30479 output_asm_insn ("add\t%4, %4, %5", operands
);
30484 output_asm_insn ("adr\t%4, %l2", operands
);
30485 return "ldr\t%|pc, [%4, %0, lsl #2]";
30488 gcc_unreachable ();
30492 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
30493 per-core tuning structs. */
30495 arm_issue_rate (void)
30497 return current_tune
->issue_rate
;
30500 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
30502 arm_sched_variable_issue (FILE *, int, rtx_insn
*insn
, int more
)
30504 if (DEBUG_INSN_P (insn
))
30507 rtx_code code
= GET_CODE (PATTERN (insn
));
30508 if (code
== USE
|| code
== CLOBBER
)
30511 if (get_attr_type (insn
) == TYPE_NO_INSN
)
30517 /* Return how many instructions should scheduler lookahead to choose the
30520 arm_first_cycle_multipass_dfa_lookahead (void)
30522 int issue_rate
= arm_issue_rate ();
30524 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
30527 /* Enable modeling of L2 auto-prefetcher. */
30529 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
30531 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
30535 arm_mangle_type (const_tree type
)
30537 /* The ARM ABI documents (10th October 2008) say that "__va_list"
30538 has to be managled as if it is in the "std" namespace. */
30539 if (TARGET_AAPCS_BASED
30540 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
30541 return "St9__va_list";
30543 /* Half-precision floating point types. */
30544 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
30546 if (TYPE_MAIN_VARIANT (type
) == float16_type_node
)
30548 if (TYPE_MODE (type
) == BFmode
)
30554 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30556 if (TYPE_NAME (type
) != NULL
)
30557 return arm_mangle_builtin_type (type
);
30559 /* Use the default mangling. */
30563 /* Order of allocation of core registers for Thumb: this allocation is
30564 written over the corresponding initial entries of the array
30565 initialized with REG_ALLOC_ORDER. We allocate all low registers
30566 first. Saving and restoring a low register is usually cheaper than
30567 using a call-clobbered high register. */
30569 static const int thumb_core_reg_alloc_order
[] =
30571 3, 2, 1, 0, 4, 5, 6, 7,
30572 12, 14, 8, 9, 10, 11
30575 /* Adjust register allocation order when compiling for Thumb. */
30578 arm_order_regs_for_local_alloc (void)
30580 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
30581 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
30583 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
30584 sizeof (thumb_core_reg_alloc_order
));
30587 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30590 arm_frame_pointer_required (void)
30592 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
30595 /* If the function receives nonlocal gotos, it needs to save the frame
30596 pointer in the nonlocal_goto_save_area object. */
30597 if (cfun
->has_nonlocal_label
)
30600 /* The frame pointer is required for non-leaf APCS frames. */
30601 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
30604 /* If we are probing the stack in the prologue, we will have a faulting
30605 instruction prior to the stack adjustment and this requires a frame
30606 pointer if we want to catch the exception using the EABI unwinder. */
30607 if (!IS_INTERRUPT (arm_current_func_type ())
30608 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
30609 || flag_stack_clash_protection
)
30610 && arm_except_unwind_info (&global_options
) == UI_TARGET
30611 && cfun
->can_throw_non_call_exceptions
)
30613 HOST_WIDE_INT size
= get_frame_size ();
30615 /* That's irrelevant if there is no stack adjustment. */
30619 /* That's relevant only if there is a stack probe. */
30620 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
30622 /* We don't have the final size of the frame so adjust. */
30623 size
+= 32 * UNITS_PER_WORD
;
30624 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
30634 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30635 All modes except THUMB1 have conditional execution.
30636 If we have conditional arithmetic, return false before reload to
30637 enable some ifcvt transformations. */
30639 arm_have_conditional_execution (void)
30641 bool has_cond_exec
, enable_ifcvt_trans
;
30643 /* Only THUMB1 cannot support conditional execution. */
30644 has_cond_exec
= !TARGET_THUMB1
;
30646 /* Enable ifcvt transformations if we have conditional arithmetic, but only
30648 enable_ifcvt_trans
= TARGET_COND_ARITH
&& !reload_completed
;
30650 return has_cond_exec
&& !enable_ifcvt_trans
;
30653 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30654 static HOST_WIDE_INT
30655 arm_vector_alignment (const_tree type
)
30657 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
30659 if (TARGET_AAPCS_BASED
)
30660 align
= MIN (align
, 64);
30665 static unsigned int
30666 arm_autovectorize_vector_modes (vector_modes
*modes
, bool)
30668 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
30670 modes
->safe_push (V16QImode
);
30671 modes
->safe_push (V8QImode
);
30677 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
30679 /* Vectors which aren't in packed structures will not be less aligned than
30680 the natural alignment of their element type, so this is safe. */
30681 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30684 return default_builtin_vector_alignment_reachable (type
, is_packed
);
30688 arm_builtin_support_vector_misalignment (machine_mode mode
,
30689 const_tree type
, int misalignment
,
30692 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30694 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
30699 /* If the misalignment is unknown, we should be able to handle the access
30700 so long as it is not to a member of a packed data structure. */
30701 if (misalignment
== -1)
30704 /* Return true if the misalignment is a multiple of the natural alignment
30705 of the vector's element type. This is probably always going to be
30706 true in practice, since we've already established that this isn't a
30708 return ((misalignment
% align
) == 0);
30711 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
30716 arm_conditional_register_usage (void)
30720 if (TARGET_THUMB1
&& optimize_size
)
30722 /* When optimizing for size on Thumb-1, it's better not
30723 to use the HI regs, because of the overhead of
30725 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
30726 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
30729 /* The link register can be clobbered by any branch insn,
30730 but we have no way to track that at present, so mark
30731 it as unavailable. */
30733 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
30735 if (TARGET_32BIT
&& TARGET_VFP_BASE
)
30737 /* VFPv3 registers are disabled when earlier VFP
30738 versions are selected due to the definition of
30739 LAST_VFP_REGNUM. */
30740 for (regno
= FIRST_VFP_REGNUM
;
30741 regno
<= LAST_VFP_REGNUM
; ++ regno
)
30743 fixed_regs
[regno
] = 0;
30744 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
30745 || regno
>= FIRST_VFP_REGNUM
+ 32;
30747 if (TARGET_HAVE_MVE
)
30748 fixed_regs
[VPR_REGNUM
] = 0;
30751 if (TARGET_REALLY_IWMMXT
&& !TARGET_GENERAL_REGS_ONLY
)
30753 regno
= FIRST_IWMMXT_GR_REGNUM
;
30754 /* The 2002/10/09 revision of the XScale ABI has wCG0
30755 and wCG1 as call-preserved registers. The 2002/11/21
30756 revision changed this so that all wCG registers are
30757 scratch registers. */
30758 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
30759 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
30760 fixed_regs
[regno
] = 0;
30761 /* The XScale ABI has wR0 - wR9 as scratch registers,
30762 the rest as call-preserved registers. */
30763 for (regno
= FIRST_IWMMXT_REGNUM
;
30764 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
30766 fixed_regs
[regno
] = 0;
30767 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
30771 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
30773 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30774 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30776 else if (TARGET_APCS_STACK
)
30778 fixed_regs
[10] = 1;
30779 call_used_regs
[10] = 1;
30781 /* -mcaller-super-interworking reserves r11 for calls to
30782 _interwork_r11_call_via_rN(). Making the register global
30783 is an easy way of ensuring that it remains valid for all
30785 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
30786 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
30788 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30789 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30790 if (TARGET_CALLER_INTERWORKING
)
30791 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30794 /* The Q and GE bits are only accessed via special ACLE patterns. */
30795 CLEAR_HARD_REG_BIT (operand_reg_set
, APSRQ_REGNUM
);
30796 CLEAR_HARD_REG_BIT (operand_reg_set
, APSRGE_REGNUM
);
30798 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30802 arm_preferred_rename_class (reg_class_t rclass
)
30804 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30805 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30806 and code size can be reduced. */
30807 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
30813 /* Compute the attribute "length" of insn "*push_multi".
30814 So this function MUST be kept in sync with that insn pattern. */
30816 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
30818 int i
, regno
, hi_reg
;
30819 int num_saves
= XVECLEN (parallel_op
, 0);
30829 regno
= REGNO (first_op
);
30830 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30831 list is 8-bit. Normally this means all registers in the list must be
30832 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
30833 encodings. There is one exception for PUSH that LR in HI_REGS can be used
30834 with 16-bit encoding. */
30835 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30836 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
30838 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
30839 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30847 /* Compute the attribute "length" of insn. Currently, this function is used
30848 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30849 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
30850 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
30851 true if OPERANDS contains insn which explicit updates base register. */
30854 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
30863 rtx parallel_op
= operands
[0];
30864 /* Initialize to elements number of PARALLEL. */
30865 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
30866 /* Initialize the value to base register. */
30867 unsigned regno
= REGNO (operands
[1]);
30868 /* Skip return and write back pattern.
30869 We only need register pop pattern for later analysis. */
30870 unsigned first_indx
= 0;
30871 first_indx
+= return_pc
? 1 : 0;
30872 first_indx
+= write_back_p
? 1 : 0;
30874 /* A pop operation can be done through LDM or POP. If the base register is SP
30875 and if it's with write back, then a LDM will be alias of POP. */
30876 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
30877 bool ldm_p
= !pop_p
;
30879 /* Check base register for LDM. */
30880 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
30883 /* Check each register in the list. */
30884 for (; indx
>= first_indx
; indx
--)
30886 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
30887 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
30888 comment in arm_attr_length_push_multi. */
30889 if (REGNO_REG_CLASS (regno
) == HI_REGS
30890 && (regno
!= PC_REGNUM
|| ldm_p
))
30897 /* Compute the number of instructions emitted by output_move_double. */
30899 arm_count_output_move_double_insns (rtx
*operands
)
30903 /* output_move_double may modify the operands array, so call it
30904 here on a copy of the array. */
30905 ops
[0] = operands
[0];
30906 ops
[1] = operands
[1];
30907 output_move_double (ops
, false, &count
);
30911 /* Same as above, but operands are a register/memory pair in SImode.
30912 Assumes operands has the base register in position 0 and memory in position
30913 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
30915 arm_count_ldrdstrd_insns (rtx
*operands
, bool load
)
30919 int regnum
, memnum
;
30921 regnum
= 0, memnum
= 1;
30923 regnum
= 1, memnum
= 0;
30924 ops
[regnum
] = gen_rtx_REG (DImode
, REGNO (operands
[0]));
30925 ops
[memnum
] = adjust_address (operands
[2], DImode
, 0);
30926 output_move_double (ops
, false, &count
);
30932 vfp3_const_double_for_fract_bits (rtx operand
)
30934 REAL_VALUE_TYPE r0
;
30936 if (!CONST_DOUBLE_P (operand
))
30939 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
30940 if (exact_real_inverse (DFmode
, &r0
)
30941 && !REAL_VALUE_NEGATIVE (r0
))
30943 if (exact_real_truncate (DFmode
, &r0
))
30945 HOST_WIDE_INT value
= real_to_integer (&r0
);
30946 value
= value
& 0xffffffff;
30947 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
30949 int ret
= exact_log2 (value
);
30950 gcc_assert (IN_RANGE (ret
, 0, 31));
30958 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30959 log2 is in [1, 32], return that log2. Otherwise return -1.
30960 This is used in the patterns for vcvt.s32.f32 floating-point to
30961 fixed-point conversions. */
30964 vfp3_const_double_for_bits (rtx x
)
30966 const REAL_VALUE_TYPE
*r
;
30968 if (!CONST_DOUBLE_P (x
))
30971 r
= CONST_DOUBLE_REAL_VALUE (x
);
30973 if (REAL_VALUE_NEGATIVE (*r
)
30974 || REAL_VALUE_ISNAN (*r
)
30975 || REAL_VALUE_ISINF (*r
)
30976 || !real_isinteger (r
, SFmode
))
30979 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
30981 /* The exact_log2 above will have returned -1 if this is
30982 not an exact log2. */
30983 if (!IN_RANGE (hwint
, 1, 32))
30990 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30993 arm_pre_atomic_barrier (enum memmodel model
)
30995 if (need_atomic_barrier_p (model
, true))
30996 emit_insn (gen_memory_barrier ());
31000 arm_post_atomic_barrier (enum memmodel model
)
31002 if (need_atomic_barrier_p (model
, false))
31003 emit_insn (gen_memory_barrier ());
31006 /* Emit the load-exclusive and store-exclusive instructions.
31007 Use acquire and release versions if necessary. */
31010 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
31012 rtx (*gen
) (rtx
, rtx
);
31018 case E_QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
31019 case E_HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
31020 case E_SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
31021 case E_DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
31023 gcc_unreachable ();
31030 case E_QImode
: gen
= gen_arm_load_exclusiveqi
; break;
31031 case E_HImode
: gen
= gen_arm_load_exclusivehi
; break;
31032 case E_SImode
: gen
= gen_arm_load_exclusivesi
; break;
31033 case E_DImode
: gen
= gen_arm_load_exclusivedi
; break;
31035 gcc_unreachable ();
31039 emit_insn (gen (rval
, mem
));
31043 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
31046 rtx (*gen
) (rtx
, rtx
, rtx
);
31052 case E_QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
31053 case E_HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
31054 case E_SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
31055 case E_DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
31057 gcc_unreachable ();
31064 case E_QImode
: gen
= gen_arm_store_exclusiveqi
; break;
31065 case E_HImode
: gen
= gen_arm_store_exclusivehi
; break;
31066 case E_SImode
: gen
= gen_arm_store_exclusivesi
; break;
31067 case E_DImode
: gen
= gen_arm_store_exclusivedi
; break;
31069 gcc_unreachable ();
31073 emit_insn (gen (bval
, rval
, mem
));
31076 /* Mark the previous jump instruction as unlikely. */
31079 emit_unlikely_jump (rtx insn
)
31081 rtx_insn
*jump
= emit_jump_insn (insn
);
31082 add_reg_br_prob_note (jump
, profile_probability::very_unlikely ());
31085 /* Expand a compare and swap pattern. */
31088 arm_expand_compare_and_swap (rtx operands
[])
31090 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
31091 machine_mode mode
, cmp_mode
;
31093 bval
= operands
[0];
31094 rval
= operands
[1];
31096 oldval
= operands
[3];
31097 newval
= operands
[4];
31098 is_weak
= operands
[5];
31099 mod_s
= operands
[6];
31100 mod_f
= operands
[7];
31101 mode
= GET_MODE (mem
);
31103 /* Normally the succ memory model must be stronger than fail, but in the
31104 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
31105 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
31107 if (TARGET_HAVE_LDACQ
31108 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
31109 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
31110 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
31116 /* For narrow modes, we're going to perform the comparison in SImode,
31117 so do the zero-extension now. */
31118 rval
= gen_reg_rtx (SImode
);
31119 oldval
= convert_modes (SImode
, mode
, oldval
, true);
31123 /* Force the value into a register if needed. We waited until after
31124 the zero-extension above to do this properly. */
31125 if (!arm_add_operand (oldval
, SImode
))
31126 oldval
= force_reg (SImode
, oldval
);
31130 if (!cmpdi_operand (oldval
, mode
))
31131 oldval
= force_reg (mode
, oldval
);
31135 gcc_unreachable ();
31139 cmp_mode
= E_SImode
;
31141 cmp_mode
= CC_Zmode
;
31143 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
31144 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode
, mode
, bdst
, rval
, mem
,
31145 oldval
, newval
, is_weak
, mod_s
, mod_f
));
31147 if (mode
== QImode
|| mode
== HImode
)
31148 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
31150 /* In all cases, we arrange for success to be signaled by Z set.
31151 This arrangement allows for the boolean result to be used directly
31152 in a subsequent branch, post optimization. For Thumb-1 targets, the
31153 boolean negation of the result is also stored in bval because Thumb-1
31154 backend lacks dependency tracking for CC flag due to flag-setting not
31155 being represented at RTL level. */
31157 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
31160 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
31161 emit_insn (gen_rtx_SET (bval
, x
));
31165 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
31166 another memory store between the load-exclusive and store-exclusive can
31167 reset the monitor from Exclusive to Open state. This means we must wait
31168 until after reload to split the pattern, lest we get a register spill in
31169 the middle of the atomic sequence. Success of the compare and swap is
31170 indicated by the Z flag set for 32bit targets and by neg_bval being zero
31171 for Thumb-1 targets (ie. negation of the boolean value returned by
31172 atomic_compare_and_swapmode standard pattern in operand 0). */
31175 arm_split_compare_and_swap (rtx operands
[])
31177 rtx rval
, mem
, oldval
, newval
, neg_bval
, mod_s_rtx
;
31179 enum memmodel mod_s
, mod_f
;
31181 rtx_code_label
*label1
, *label2
;
31184 rval
= operands
[1];
31186 oldval
= operands
[3];
31187 newval
= operands
[4];
31188 is_weak
= (operands
[5] != const0_rtx
);
31189 mod_s_rtx
= operands
[6];
31190 mod_s
= memmodel_from_int (INTVAL (mod_s_rtx
));
31191 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
31192 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
31193 mode
= GET_MODE (mem
);
31195 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
31197 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (mod_s_rtx
);
31198 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (mod_s_rtx
);
31200 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
31201 a full barrier is emitted after the store-release. */
31203 use_acquire
= false;
31205 /* Checks whether a barrier is needed and emits one accordingly. */
31206 if (!(use_acquire
|| use_release
))
31207 arm_pre_atomic_barrier (mod_s
);
31212 label1
= gen_label_rtx ();
31213 emit_label (label1
);
31215 label2
= gen_label_rtx ();
31217 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
31219 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31220 as required to communicate with arm_expand_compare_and_swap. */
31223 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
31224 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
31225 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
31226 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
31227 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
31231 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
31232 if (thumb1_cmpneg_operand (oldval
, SImode
))
31235 if (!satisfies_constraint_L (oldval
))
31237 gcc_assert (satisfies_constraint_J (oldval
));
31239 /* For such immediates, ADDS needs the source and destination regs
31242 Normally this would be handled by RA, but this is all happening
31244 emit_move_insn (neg_bval
, rval
);
31248 emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval
, src
, oldval
,
31253 emit_move_insn (neg_bval
, const1_rtx
);
31254 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
31258 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
31260 /* Weak or strong, we want EQ to be true for success, so that we
31261 match the flags that we got from the compare above. */
31264 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
31265 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
31266 emit_insn (gen_rtx_SET (cond
, x
));
31271 /* Z is set to boolean value of !neg_bval, as required to communicate
31272 with arm_expand_compare_and_swap. */
31273 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
31274 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
31277 if (!is_mm_relaxed (mod_f
))
31278 emit_label (label2
);
31280 /* Checks whether a barrier is needed and emits one accordingly. */
31282 || !(use_acquire
|| use_release
))
31283 arm_post_atomic_barrier (mod_s
);
31285 if (is_mm_relaxed (mod_f
))
31286 emit_label (label2
);
31289 /* Split an atomic operation pattern. Operation is given by CODE and is one
31290 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31291 operation). Operation is performed on the content at MEM and on VALUE
31292 following the memory model MODEL_RTX. The content at MEM before and after
31293 the operation is returned in OLD_OUT and NEW_OUT respectively while the
31294 success of the operation is returned in COND. Using a scratch register or
31295 an operand register for these determines what result is returned for that
31299 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
31300 rtx value
, rtx model_rtx
, rtx cond
)
31302 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
31303 machine_mode mode
= GET_MODE (mem
);
31304 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
31305 rtx_code_label
*label
;
31306 bool all_low_regs
, bind_old_new
;
31309 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
31311 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (model_rtx
);
31312 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (model_rtx
);
31314 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
31315 a full barrier is emitted after the store-release. */
31317 use_acquire
= false;
31319 /* Checks whether a barrier is needed and emits one accordingly. */
31320 if (!(use_acquire
|| use_release
))
31321 arm_pre_atomic_barrier (model
);
31323 label
= gen_label_rtx ();
31324 emit_label (label
);
31327 new_out
= gen_lowpart (wmode
, new_out
);
31329 old_out
= gen_lowpart (wmode
, old_out
);
31332 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
31334 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
31336 /* Does the operation require destination and first operand to use the same
31337 register? This is decided by register constraints of relevant insn
31338 patterns in thumb1.md. */
31339 gcc_assert (!new_out
|| REG_P (new_out
));
31340 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
31341 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
31342 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
31347 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
31349 /* We want to return the old value while putting the result of the operation
31350 in the same register as the old value so copy the old value over to the
31351 destination register and use that register for the operation. */
31352 if (old_out
&& bind_old_new
)
31354 emit_move_insn (new_out
, old_out
);
31365 x
= gen_rtx_AND (wmode
, old_out
, value
);
31366 emit_insn (gen_rtx_SET (new_out
, x
));
31367 x
= gen_rtx_NOT (wmode
, new_out
);
31368 emit_insn (gen_rtx_SET (new_out
, x
));
31372 if (CONST_INT_P (value
))
31374 value
= gen_int_mode (-INTVAL (value
), wmode
);
31380 if (mode
== DImode
)
31382 /* DImode plus/minus need to clobber flags. */
31383 /* The adddi3 and subdi3 patterns are incorrectly written so that
31384 they require matching operands, even when we could easily support
31385 three operands. Thankfully, this can be fixed up post-splitting,
31386 as the individual add+adc patterns do accept three operands and
31387 post-reload cprop can make these moves go away. */
31388 emit_move_insn (new_out
, old_out
);
31390 x
= gen_adddi3 (new_out
, new_out
, value
);
31392 x
= gen_subdi3 (new_out
, new_out
, value
);
31399 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
31400 emit_insn (gen_rtx_SET (new_out
, x
));
31404 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
31407 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
31408 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
31410 /* Checks whether a barrier is needed and emits one accordingly. */
31412 || !(use_acquire
|| use_release
))
31413 arm_post_atomic_barrier (model
);
31416 /* Return the mode for the MVE vector of predicates corresponding to MODE. */
31418 arm_mode_to_pred_mode (machine_mode mode
)
31420 switch (GET_MODE_NUNITS (mode
))
31422 case 16: return V16BImode
;
31423 case 8: return V8BImode
;
31424 case 4: return V4BImode
;
31425 case 2: return V2QImode
;
31427 return opt_machine_mode ();
31430 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31431 If CAN_INVERT, store either the result or its inverse in TARGET
31432 and return true if TARGET contains the inverse. If !CAN_INVERT,
31433 always store the result in TARGET, never its inverse.
31435 Note that the handling of floating-point comparisons is not
31439 arm_expand_vector_compare (rtx target
, rtx_code code
, rtx op0
, rtx op1
,
31442 machine_mode cmp_result_mode
= GET_MODE (target
);
31443 machine_mode cmp_mode
= GET_MODE (op0
);
31447 /* MVE supports more comparisons than Neon. */
31448 if (TARGET_HAVE_MVE
)
31453 /* For these we need to compute the inverse of the requested
31462 code
= reverse_condition_maybe_unordered (code
);
31465 /* Recursively emit the inverted comparison into a temporary
31466 and then store its inverse in TARGET. This avoids reusing
31467 TARGET (which for integer NE could be one of the inputs). */
31468 rtx tmp
= gen_reg_rtx (cmp_result_mode
);
31469 if (arm_expand_vector_compare (tmp
, code
, op0
, op1
, true))
31470 gcc_unreachable ();
31471 emit_insn (gen_rtx_SET (target
, gen_rtx_NOT (cmp_result_mode
, tmp
)));
31484 /* These are natively supported by Neon for zero comparisons, but otherwise
31485 require the operands to be swapped. For MVE, we can only compare
31489 if (!TARGET_HAVE_MVE
)
31490 if (op1
!= CONST0_RTX (cmp_mode
))
31492 code
= swap_condition (code
);
31493 std::swap (op0
, op1
);
31495 /* Fall through. */
31497 /* These are natively supported by Neon for both register and zero
31498 operands. MVE supports registers only. */
31503 if (TARGET_HAVE_MVE
)
31505 switch (GET_MODE_CLASS (cmp_mode
))
31507 case MODE_VECTOR_INT
:
31508 emit_insn (gen_mve_vcmpq (code
, cmp_mode
, target
,
31509 op0
, force_reg (cmp_mode
, op1
)));
31511 case MODE_VECTOR_FLOAT
:
31512 if (TARGET_HAVE_MVE_FLOAT
)
31513 emit_insn (gen_mve_vcmpq_f (code
, cmp_mode
, target
,
31514 op0
, force_reg (cmp_mode
, op1
)));
31516 gcc_unreachable ();
31519 gcc_unreachable ();
31523 emit_insn (gen_neon_vc (code
, cmp_mode
, target
, op0
, op1
));
31526 /* These are natively supported for register operands only.
31527 Comparisons with zero aren't useful and should be folded
31528 or canonicalized by target-independent code. */
31531 if (TARGET_HAVE_MVE
)
31532 emit_insn (gen_mve_vcmpq (code
, cmp_mode
, target
,
31533 op0
, force_reg (cmp_mode
, op1
)));
31535 emit_insn (gen_neon_vc (code
, cmp_mode
, target
,
31536 op0
, force_reg (cmp_mode
, op1
)));
31539 /* These require the operands to be swapped and likewise do not
31540 support comparisons with zero. */
31543 if (TARGET_HAVE_MVE
)
31544 emit_insn (gen_mve_vcmpq (swap_condition (code
), cmp_mode
, target
,
31545 force_reg (cmp_mode
, op1
), op0
));
31547 emit_insn (gen_neon_vc (swap_condition (code
), cmp_mode
,
31548 target
, force_reg (cmp_mode
, op1
), op0
));
31551 /* These need a combination of two comparisons. */
31555 /* Operands are LTGT iff (a > b || a > b).
31556 Operands are ORDERED iff (a > b || a <= b). */
31557 rtx gt_res
= gen_reg_rtx (cmp_result_mode
);
31558 rtx alt_res
= gen_reg_rtx (cmp_result_mode
);
31559 rtx_code alt_code
= (code
== LTGT
? LT
: LE
);
31560 if (arm_expand_vector_compare (gt_res
, GT
, op0
, op1
, true)
31561 || arm_expand_vector_compare (alt_res
, alt_code
, op0
, op1
, true))
31562 gcc_unreachable ();
31563 emit_insn (gen_rtx_SET (target
, gen_rtx_IOR (cmp_result_mode
,
31564 gt_res
, alt_res
)));
31569 gcc_unreachable ();
31573 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31574 CMP_RESULT_MODE is the mode of the comparison result. */
31577 arm_expand_vcond (rtx
*operands
, machine_mode cmp_result_mode
)
31579 /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31580 arm_expand_vector_compare, and another one here. */
31583 if (TARGET_HAVE_MVE
)
31584 mask
= gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode
).require ());
31586 mask
= gen_reg_rtx (cmp_result_mode
);
31588 bool inverted
= arm_expand_vector_compare (mask
, GET_CODE (operands
[3]),
31589 operands
[4], operands
[5], true);
31591 std::swap (operands
[1], operands
[2]);
31593 emit_insn (gen_neon_vbsl (GET_MODE (operands
[0]), operands
[0],
31594 mask
, operands
[1], operands
[2]));
31597 machine_mode cmp_mode
= GET_MODE (operands
[0]);
31599 switch (GET_MODE_CLASS (cmp_mode
))
31601 case MODE_VECTOR_INT
:
31602 emit_insn (gen_mve_vpselq (VPSELQ_S
, cmp_mode
, operands
[0],
31603 operands
[1], operands
[2], mask
));
31605 case MODE_VECTOR_FLOAT
:
31606 if (TARGET_HAVE_MVE_FLOAT
)
31607 emit_insn (gen_mve_vpselq_f (cmp_mode
, operands
[0],
31608 operands
[1], operands
[2], mask
));
31610 gcc_unreachable ();
31613 gcc_unreachable ();
31618 #define MAX_VECT_LEN 16
31620 struct expand_vec_perm_d
31622 rtx target
, op0
, op1
;
31623 vec_perm_indices perm
;
31624 machine_mode vmode
;
31629 /* Generate a variable permutation. */
31632 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
31634 machine_mode vmode
= GET_MODE (target
);
31635 bool one_vector_p
= rtx_equal_p (op0
, op1
);
31637 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
31638 gcc_checking_assert (GET_MODE (op0
) == vmode
);
31639 gcc_checking_assert (GET_MODE (op1
) == vmode
);
31640 gcc_checking_assert (GET_MODE (sel
) == vmode
);
31641 gcc_checking_assert (TARGET_NEON
);
31645 if (vmode
== V8QImode
)
31646 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
31648 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
31654 if (vmode
== V8QImode
)
31656 pair
= gen_reg_rtx (V16QImode
);
31657 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
31658 pair
= gen_lowpart (TImode
, pair
);
31659 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
31663 pair
= gen_reg_rtx (OImode
);
31664 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
31665 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
31671 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
31673 machine_mode vmode
= GET_MODE (target
);
31674 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
31675 bool one_vector_p
= rtx_equal_p (op0
, op1
);
31678 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31679 numbering of elements for big-endian, we must reverse the order. */
31680 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
31682 /* The VTBL instruction does not use a modulo index, so we must take care
31683 of that ourselves. */
31684 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31685 mask
= gen_const_vec_duplicate (vmode
, mask
);
31686 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
31688 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
31691 /* Map lane ordering between architectural lane order, and GCC lane order,
31692 taking into account ABI. See comment above output_move_neon for details. */
31695 neon_endian_lane_map (machine_mode mode
, int lane
)
31697 if (BYTES_BIG_ENDIAN
)
31699 int nelems
= GET_MODE_NUNITS (mode
);
31700 /* Reverse lane order. */
31701 lane
= (nelems
- 1 - lane
);
31702 /* Reverse D register order, to match ABI. */
31703 if (GET_MODE_SIZE (mode
) == 16)
31704 lane
= lane
^ (nelems
/ 2);
31709 /* Some permutations index into pairs of vectors, this is a helper function
31710 to map indexes into those pairs of vectors. */
31713 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
31715 int nelem
= GET_MODE_NUNITS (mode
);
31716 if (BYTES_BIG_ENDIAN
)
31718 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
31722 /* Generate or test for an insn that supports a constant permutation. */
31724 /* Recognize patterns for the VUZP insns. */
31727 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
31729 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
31730 rtx out0
, out1
, in0
, in1
;
31734 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
31737 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31738 big endian pattern on 64 bit vectors, so we correct for that. */
31739 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
31740 && GET_MODE_SIZE (d
->vmode
) == 8 ? nelt
: 0;
31742 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
31744 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
31746 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
31750 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31752 for (i
= 0; i
< nelt
; i
++)
31755 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
31756 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
31766 if (swap_nelt
!= 0)
31767 std::swap (in0
, in1
);
31770 out1
= gen_reg_rtx (d
->vmode
);
31772 std::swap (out0
, out1
);
31774 emit_insn (gen_neon_vuzp_internal (d
->vmode
, out0
, in0
, in1
, out1
));
31778 /* Recognize patterns for the VZIP insns. */
31781 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
31783 unsigned int i
, high
, mask
, nelt
= d
->perm
.length ();
31784 rtx out0
, out1
, in0
, in1
;
31788 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
31791 is_swapped
= BYTES_BIG_ENDIAN
;
31793 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
31796 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
31798 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
31802 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31804 for (i
= 0; i
< nelt
/ 2; i
++)
31807 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
31808 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
31812 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
31813 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
31825 std::swap (in0
, in1
);
31828 out1
= gen_reg_rtx (d
->vmode
);
31830 std::swap (out0
, out1
);
31832 emit_insn (gen_neon_vzip_internal (d
->vmode
, out0
, in0
, in1
, out1
));
31836 /* Recognize patterns for the VREV insns. */
31838 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
31840 unsigned int i
, j
, diff
, nelt
= d
->perm
.length ();
31841 rtx (*gen
) (machine_mode
, rtx
, rtx
);
31843 if (!d
->one_vector_p
)
31854 gen
= gen_neon_vrev64
;
31865 gen
= gen_neon_vrev32
;
31871 gen
= gen_neon_vrev64
;
31882 gen
= gen_neon_vrev16
;
31886 gen
= gen_neon_vrev32
;
31892 gen
= gen_neon_vrev64
;
31902 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
31903 for (j
= 0; j
<= diff
; j
+= 1)
31905 /* This is guaranteed to be true as the value of diff
31906 is 7, 3, 1 and we should have enough elements in the
31907 queue to generate this. Getting a vector mask with a
31908 value of diff other than these values implies that
31909 something is wrong by the time we get here. */
31910 gcc_assert (i
+ j
< nelt
);
31911 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
31919 emit_insn (gen (d
->vmode
, d
->target
, d
->op0
));
31923 /* Recognize patterns for the VTRN insns. */
31926 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
31928 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
31929 rtx out0
, out1
, in0
, in1
;
31931 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
31934 /* Note that these are little-endian tests. Adjust for big-endian later. */
31935 if (d
->perm
[0] == 0)
31937 else if (d
->perm
[0] == 1)
31941 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31943 for (i
= 0; i
< nelt
; i
+= 2)
31945 if (d
->perm
[i
] != i
+ odd
)
31947 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
31957 if (BYTES_BIG_ENDIAN
)
31959 std::swap (in0
, in1
);
31964 out1
= gen_reg_rtx (d
->vmode
);
31966 std::swap (out0
, out1
);
31968 emit_insn (gen_neon_vtrn_internal (d
->vmode
, out0
, in0
, in1
, out1
));
31972 /* Recognize patterns for the VEXT insns. */
31975 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
31977 unsigned int i
, nelt
= d
->perm
.length ();
31980 unsigned int location
;
31982 unsigned int next
= d
->perm
[0] + 1;
31984 /* TODO: Handle GCC's numbering of elements for big-endian. */
31985 if (BYTES_BIG_ENDIAN
)
31988 /* Check if the extracted indexes are increasing by one. */
31989 for (i
= 1; i
< nelt
; next
++, i
++)
31991 /* If we hit the most significant element of the 2nd vector in
31992 the previous iteration, no need to test further. */
31993 if (next
== 2 * nelt
)
31996 /* If we are operating on only one vector: it could be a
31997 rotation. If there are only two elements of size < 64, let
31998 arm_evpc_neon_vrev catch it. */
31999 if (d
->one_vector_p
&& (next
== nelt
))
32001 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
32007 if (d
->perm
[i
] != next
)
32011 location
= d
->perm
[0];
32017 offset
= GEN_INT (location
);
32019 if(d
->vmode
== E_DImode
)
32022 emit_insn (gen_neon_vext (d
->vmode
, d
->target
, d
->op0
, d
->op1
, offset
));
32026 /* The NEON VTBL instruction is a fully variable permuation that's even
32027 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
32028 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
32029 can do slightly better by expanding this as a constant where we don't
32030 have to apply a mask. */
32033 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
32035 rtx rperm
[MAX_VECT_LEN
], sel
;
32036 machine_mode vmode
= d
->vmode
;
32037 unsigned int i
, nelt
= d
->perm
.length ();
32039 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
32040 numbering of elements for big-endian, we must reverse the order. */
32041 if (BYTES_BIG_ENDIAN
)
32047 /* Generic code will try constant permutation twice. Once with the
32048 original mode and again with the elements lowered to QImode.
32049 So wait and don't do the selector expansion ourselves. */
32050 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
32053 for (i
= 0; i
< nelt
; ++i
)
32054 rperm
[i
] = GEN_INT (d
->perm
[i
]);
32055 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
32056 sel
= force_reg (vmode
, sel
);
32058 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
32063 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
32065 /* Check if the input mask matches vext before reordering the
32068 if (arm_evpc_neon_vext (d
))
32071 /* The pattern matching functions above are written to look for a small
32072 number to begin the sequence (0, 1, N/2). If we begin with an index
32073 from the second operand, we can swap the operands. */
32074 unsigned int nelt
= d
->perm
.length ();
32075 if (d
->perm
[0] >= nelt
)
32077 d
->perm
.rotate_inputs (1);
32078 std::swap (d
->op0
, d
->op1
);
32083 if (arm_evpc_neon_vuzp (d
))
32085 if (arm_evpc_neon_vzip (d
))
32087 if (arm_evpc_neon_vrev (d
))
32089 if (arm_evpc_neon_vtrn (d
))
32091 return arm_evpc_neon_vtbl (d
);
32096 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
32099 arm_vectorize_vec_perm_const (machine_mode vmode
, machine_mode op_mode
,
32100 rtx target
, rtx op0
, rtx op1
,
32101 const vec_perm_indices
&sel
)
32103 if (vmode
!= op_mode
)
32106 struct expand_vec_perm_d d
;
32107 int i
, nelt
, which
;
32109 if (!VALID_NEON_DREG_MODE (vmode
) && !VALID_NEON_QREG_MODE (vmode
))
32115 rtx nop0
= force_reg (vmode
, op0
);
32121 op1
= force_reg (vmode
, op1
);
32126 gcc_assert (VECTOR_MODE_P (d
.vmode
));
32127 d
.testing_p
= !target
;
32129 nelt
= GET_MODE_NUNITS (d
.vmode
);
32130 for (i
= which
= 0; i
< nelt
; ++i
)
32132 int ei
= sel
[i
] & (2 * nelt
- 1);
32133 which
|= (ei
< nelt
? 1 : 2);
32142 d
.one_vector_p
= false;
32143 if (d
.testing_p
|| !rtx_equal_p (op0
, op1
))
32146 /* The elements of PERM do not suggest that only the first operand
32147 is used, but both operands are identical. Allow easier matching
32148 of the permutation by folding the permutation into the single
32153 d
.one_vector_p
= true;
32158 d
.one_vector_p
= true;
32162 d
.perm
.new_vector (sel
.encoding (), d
.one_vector_p
? 1 : 2, nelt
);
32165 return arm_expand_vec_perm_const_1 (&d
);
32167 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
32168 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
32169 if (!d
.one_vector_p
)
32170 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
32173 bool ret
= arm_expand_vec_perm_const_1 (&d
);
32180 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
32182 /* If we are soft float and we do not have ldrd
32183 then all auto increment forms are ok. */
32184 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
32189 /* Post increment and Pre Decrement are supported for all
32190 instruction forms except for vector forms. */
32193 if (VECTOR_MODE_P (mode
))
32195 if (code
!= ARM_PRE_DEC
)
32205 /* Without LDRD and mode size greater than
32206 word size, there is no point in auto-incrementing
32207 because ldm and stm will not have these forms. */
32208 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
32211 /* Vector and floating point modes do not support
32212 these auto increment forms. */
32213 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
32226 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32227 on ARM, since we know that shifts by negative amounts are no-ops.
32228 Additionally, the default expansion code is not available or suitable
32229 for post-reload insn splits (this can occur when the register allocator
32230 chooses not to do a shift in NEON).
32232 This function is used in both initial expand and post-reload splits, and
32233 handles all kinds of 64-bit shifts.
32235 Input requirements:
32236 - It is safe for the input and output to be the same register, but
32237 early-clobber rules apply for the shift amount and scratch registers.
32238 - Shift by register requires both scratch registers. In all other cases
32239 the scratch registers may be NULL.
32240 - Ashiftrt by a register also clobbers the CC register. */
32242 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
32243 rtx amount
, rtx scratch1
, rtx scratch2
)
32245 rtx out_high
= gen_highpart (SImode
, out
);
32246 rtx out_low
= gen_lowpart (SImode
, out
);
32247 rtx in_high
= gen_highpart (SImode
, in
);
32248 rtx in_low
= gen_lowpart (SImode
, in
);
32251 in = the register pair containing the input value.
32252 out = the destination register pair.
32253 up = the high- or low-part of each pair.
32254 down = the opposite part to "up".
32255 In a shift, we can consider bits to shift from "up"-stream to
32256 "down"-stream, so in a left-shift "up" is the low-part and "down"
32257 is the high-part of each register pair. */
32259 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
32260 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
32261 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
32262 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
32264 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
32266 && (REG_P (out
) || SUBREG_P (out
))
32267 && GET_MODE (out
) == DImode
);
32269 && (REG_P (in
) || SUBREG_P (in
))
32270 && GET_MODE (in
) == DImode
);
32272 && (((REG_P (amount
) || SUBREG_P (amount
))
32273 && GET_MODE (amount
) == SImode
)
32274 || CONST_INT_P (amount
)));
32275 gcc_assert (scratch1
== NULL
32276 || (GET_CODE (scratch1
) == SCRATCH
)
32277 || (GET_MODE (scratch1
) == SImode
32278 && REG_P (scratch1
)));
32279 gcc_assert (scratch2
== NULL
32280 || (GET_CODE (scratch2
) == SCRATCH
)
32281 || (GET_MODE (scratch2
) == SImode
32282 && REG_P (scratch2
)));
32283 gcc_assert (!REG_P (out
) || !REG_P (amount
)
32284 || !HARD_REGISTER_P (out
)
32285 || (REGNO (out
) != REGNO (amount
)
32286 && REGNO (out
) + 1 != REGNO (amount
)));
32288 /* Macros to make following code more readable. */
32289 #define SUB_32(DEST,SRC) \
32290 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32291 #define RSB_32(DEST,SRC) \
32292 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32293 #define SUB_S_32(DEST,SRC) \
32294 gen_addsi3_compare0 ((DEST), (SRC), \
32296 #define SET(DEST,SRC) \
32297 gen_rtx_SET ((DEST), (SRC))
32298 #define SHIFT(CODE,SRC,AMOUNT) \
32299 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32300 #define LSHIFT(CODE,SRC,AMOUNT) \
32301 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32302 SImode, (SRC), (AMOUNT))
32303 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32304 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32305 SImode, (SRC), (AMOUNT))
32307 gen_rtx_IOR (SImode, (A), (B))
32308 #define BRANCH(COND,LABEL) \
32309 gen_arm_cond_branch ((LABEL), \
32310 gen_rtx_ ## COND (CCmode, cc_reg, \
32314 /* Shifts by register and shifts by constant are handled separately. */
32315 if (CONST_INT_P (amount
))
32317 /* We have a shift-by-constant. */
32319 /* First, handle out-of-range shift amounts.
32320 In both cases we try to match the result an ARM instruction in a
32321 shift-by-register would give. This helps reduce execution
32322 differences between optimization levels, but it won't stop other
32323 parts of the compiler doing different things. This is "undefined
32324 behavior, in any case. */
32325 if (INTVAL (amount
) <= 0)
32326 emit_insn (gen_movdi (out
, in
));
32327 else if (INTVAL (amount
) >= 64)
32329 if (code
== ASHIFTRT
)
32331 rtx const31_rtx
= GEN_INT (31);
32332 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
32333 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
32336 emit_insn (gen_movdi (out
, const0_rtx
));
32339 /* Now handle valid shifts. */
32340 else if (INTVAL (amount
) < 32)
32342 /* Shifts by a constant less than 32. */
32343 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
32345 /* Clearing the out register in DImode first avoids lots
32346 of spilling and results in less stack usage.
32347 Later this redundant insn is completely removed.
32348 Do that only if "in" and "out" are different registers. */
32349 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
32350 emit_insn (SET (out
, const0_rtx
));
32351 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
32352 emit_insn (SET (out_down
,
32353 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
32355 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
32359 /* Shifts by a constant greater than 31. */
32360 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
32362 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
32363 emit_insn (SET (out
, const0_rtx
));
32364 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
32365 if (code
== ASHIFTRT
)
32366 emit_insn (gen_ashrsi3 (out_up
, in_up
,
32369 emit_insn (SET (out_up
, const0_rtx
));
32374 /* We have a shift-by-register. */
32375 rtx cc_reg
= gen_rtx_REG (CC_NZmode
, CC_REGNUM
);
32377 /* This alternative requires the scratch registers. */
32378 gcc_assert (scratch1
&& REG_P (scratch1
));
32379 gcc_assert (scratch2
&& REG_P (scratch2
));
32381 /* We will need the values "amount-32" and "32-amount" later.
32382 Swapping them around now allows the later code to be more general. */
32386 emit_insn (SUB_32 (scratch1
, amount
));
32387 emit_insn (RSB_32 (scratch2
, amount
));
32390 emit_insn (RSB_32 (scratch1
, amount
));
32391 /* Also set CC = amount > 32. */
32392 emit_insn (SUB_S_32 (scratch2
, amount
));
32395 emit_insn (RSB_32 (scratch1
, amount
));
32396 emit_insn (SUB_32 (scratch2
, amount
));
32399 gcc_unreachable ();
32402 /* Emit code like this:
32405 out_down = in_down << amount;
32406 out_down = (in_up << (amount - 32)) | out_down;
32407 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32408 out_up = in_up << amount;
32411 out_down = in_down >> amount;
32412 out_down = (in_up << (32 - amount)) | out_down;
32414 out_down = ((signed)in_up >> (amount - 32)) | out_down;
32415 out_up = in_up << amount;
32418 out_down = in_down >> amount;
32419 out_down = (in_up << (32 - amount)) | out_down;
32421 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32422 out_up = in_up << amount;
32424 The ARM and Thumb2 variants are the same but implemented slightly
32425 differently. If this were only called during expand we could just
32426 use the Thumb2 case and let combine do the right thing, but this
32427 can also be called from post-reload splitters. */
32429 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
32431 if (!TARGET_THUMB2
)
32433 /* Emit code for ARM mode. */
32434 emit_insn (SET (out_down
,
32435 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
32436 if (code
== ASHIFTRT
)
32438 rtx_code_label
*done_label
= gen_label_rtx ();
32439 emit_jump_insn (BRANCH (LT
, done_label
));
32440 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
32442 emit_label (done_label
);
32445 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
32450 /* Emit code for Thumb2 mode.
32451 Thumb2 can't do shift and or in one insn. */
32452 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
32453 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
32455 if (code
== ASHIFTRT
)
32457 rtx_code_label
*done_label
= gen_label_rtx ();
32458 emit_jump_insn (BRANCH (LT
, done_label
));
32459 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
32460 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
32461 emit_label (done_label
);
32465 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
32466 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
32470 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
32484 /* Returns true if the pattern is a valid symbolic address, which is either a
32485 symbol_ref or (symbol_ref + addend).
32487 According to the ARM ELF ABI, the initial addend of REL-type relocations
32488 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32489 literal field of the instruction as a 16-bit signed value in the range
32490 -32768 <= A < 32768.
32492 In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32493 unsigned range of 0 <= A < 256 as described in the AAELF32
32494 relocation handling documentation: REL-type relocations are encoded
32495 as unsigned in this case. */
32498 arm_valid_symbolic_address_p (rtx addr
)
32500 rtx xop0
, xop1
= NULL_RTX
;
32503 if (target_word_relocations
)
32506 if (SYMBOL_REF_P (tmp
) || LABEL_REF_P (tmp
))
32509 /* (const (plus: symbol_ref const_int)) */
32510 if (GET_CODE (addr
) == CONST
)
32511 tmp
= XEXP (addr
, 0);
32513 if (GET_CODE (tmp
) == PLUS
)
32515 xop0
= XEXP (tmp
, 0);
32516 xop1
= XEXP (tmp
, 1);
32518 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
32520 if (TARGET_THUMB1
&& !TARGET_HAVE_MOVT
)
32521 return IN_RANGE (INTVAL (xop1
), 0, 0xff);
32523 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
32530 /* Returns true if a valid comparison operation and makes
32531 the operands in a form that is valid. */
32533 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
32535 enum rtx_code code
= GET_CODE (*comparison
);
32537 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
32538 ? GET_MODE (*op2
) : GET_MODE (*op1
);
32540 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
32542 if (code
== UNEQ
|| code
== LTGT
)
32545 code_int
= (int)code
;
32546 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
32547 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
32552 if (!arm_add_operand (*op1
, mode
))
32553 *op1
= force_reg (mode
, *op1
);
32554 if (!arm_add_operand (*op2
, mode
))
32555 *op2
= force_reg (mode
, *op2
);
32559 /* gen_compare_reg() will sort out any invalid operands. */
32563 if (!TARGET_VFP_FP16INST
)
32565 /* FP16 comparisons are done in SF mode. */
32567 *op1
= convert_to_mode (mode
, *op1
, 1);
32568 *op2
= convert_to_mode (mode
, *op2
, 1);
32569 /* Fall through. */
32572 if (!vfp_compare_operand (*op1
, mode
))
32573 *op1
= force_reg (mode
, *op1
);
32574 if (!vfp_compare_operand (*op2
, mode
))
32575 *op2
= force_reg (mode
, *op2
);
32585 /* Maximum number of instructions to set block of memory. */
32587 arm_block_set_max_insns (void)
32589 if (optimize_function_for_size_p (cfun
))
32592 return current_tune
->max_insns_inline_memset
;
32595 /* Return TRUE if it's profitable to set block of memory for
32596 non-vectorized case. VAL is the value to set the memory
32597 with. LENGTH is the number of bytes to set. ALIGN is the
32598 alignment of the destination memory in bytes. UNALIGNED_P
32599 is TRUE if we can only set the memory with instructions
32600 meeting alignment requirements. USE_STRD_P is TRUE if we
32601 can use strd to set the memory. */
32603 arm_block_set_non_vect_profit_p (rtx val
,
32604 unsigned HOST_WIDE_INT length
,
32605 unsigned HOST_WIDE_INT align
,
32606 bool unaligned_p
, bool use_strd_p
)
32609 /* For leftovers in bytes of 0-7, we can set the memory block using
32610 strb/strh/str with minimum instruction number. */
32611 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32615 num
= arm_const_inline_cost (SET
, val
);
32616 num
+= length
/ align
+ length
% align
;
32618 else if (use_strd_p
)
32620 num
= arm_const_double_inline_cost (val
);
32621 num
+= (length
>> 3) + leftover
[length
& 7];
32625 num
= arm_const_inline_cost (SET
, val
);
32626 num
+= (length
>> 2) + leftover
[length
& 3];
32629 /* We may be able to combine last pair STRH/STRB into a single STR
32630 by shifting one byte back. */
32631 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
32634 return (num
<= arm_block_set_max_insns ());
32637 /* Return TRUE if it's profitable to set block of memory for
32638 vectorized case. LENGTH is the number of bytes to set.
32639 ALIGN is the alignment of destination memory in bytes.
32640 MODE is the vector mode used to set the memory. */
32642 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
32643 unsigned HOST_WIDE_INT align
,
32647 bool unaligned_p
= ((align
& 3) != 0);
32648 unsigned int nelt
= GET_MODE_NUNITS (mode
);
32650 /* Instruction loading constant value. */
32652 /* Instructions storing the memory. */
32653 num
+= (length
+ nelt
- 1) / nelt
;
32654 /* Instructions adjusting the address expression. Only need to
32655 adjust address expression if it's 4 bytes aligned and bytes
32656 leftover can only be stored by mis-aligned store instruction. */
32657 if (!unaligned_p
&& (length
& 3) != 0)
32660 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
32661 if (!unaligned_p
&& mode
== V16QImode
)
32664 return (num
<= arm_block_set_max_insns ());
32667 /* Set a block of memory using vectorization instructions for the
32668 unaligned case. We fill the first LENGTH bytes of the memory
32669 area starting from DSTBASE with byte constant VALUE. ALIGN is
32670 the alignment requirement of memory. Return TRUE if succeeded. */
32672 arm_block_set_unaligned_vect (rtx dstbase
,
32673 unsigned HOST_WIDE_INT length
,
32674 unsigned HOST_WIDE_INT value
,
32675 unsigned HOST_WIDE_INT align
)
32677 unsigned int i
, nelt_v16
, nelt_v8
, nelt_mode
;
32680 rtx (*gen_func
) (rtx
, rtx
);
32682 unsigned HOST_WIDE_INT v
= value
;
32683 unsigned int offset
= 0;
32684 gcc_assert ((align
& 0x3) != 0);
32685 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
32686 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
32687 if (length
>= nelt_v16
)
32690 gen_func
= gen_movmisalignv16qi
;
32695 gen_func
= gen_movmisalignv8qi
;
32697 nelt_mode
= GET_MODE_NUNITS (mode
);
32698 gcc_assert (length
>= nelt_mode
);
32699 /* Skip if it isn't profitable. */
32700 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
32703 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32704 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32706 v
= sext_hwi (v
, BITS_PER_WORD
);
32708 reg
= gen_reg_rtx (mode
);
32709 val_vec
= gen_const_vec_duplicate (mode
, GEN_INT (v
));
32710 /* Emit instruction loading the constant value. */
32711 emit_move_insn (reg
, val_vec
);
32713 /* Handle nelt_mode bytes in a vector. */
32714 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
32716 emit_insn ((*gen_func
) (mem
, reg
));
32717 if (i
+ 2 * nelt_mode
<= length
)
32719 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
32720 offset
+= nelt_mode
;
32721 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32725 /* If there are not less than nelt_v8 bytes leftover, we must be in
32727 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
32729 /* Handle (8, 16) bytes leftover. */
32730 if (i
+ nelt_v8
< length
)
32732 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
32733 offset
+= length
- i
;
32734 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32736 /* We are shifting bytes back, set the alignment accordingly. */
32737 if ((length
& 1) != 0 && align
>= 2)
32738 set_mem_align (mem
, BITS_PER_UNIT
);
32740 emit_insn (gen_movmisalignv16qi (mem
, reg
));
32742 /* Handle (0, 8] bytes leftover. */
32743 else if (i
< length
&& i
+ nelt_v8
>= length
)
32745 if (mode
== V16QImode
)
32746 reg
= gen_lowpart (V8QImode
, reg
);
32748 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
32749 + (nelt_mode
- nelt_v8
))));
32750 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
32751 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
32753 /* We are shifting bytes back, set the alignment accordingly. */
32754 if ((length
& 1) != 0 && align
>= 2)
32755 set_mem_align (mem
, BITS_PER_UNIT
);
32757 emit_insn (gen_movmisalignv8qi (mem
, reg
));
32763 /* Set a block of memory using vectorization instructions for the
32764 aligned case. We fill the first LENGTH bytes of the memory area
32765 starting from DSTBASE with byte constant VALUE. ALIGN is the
32766 alignment requirement of memory. Return TRUE if succeeded. */
32768 arm_block_set_aligned_vect (rtx dstbase
,
32769 unsigned HOST_WIDE_INT length
,
32770 unsigned HOST_WIDE_INT value
,
32771 unsigned HOST_WIDE_INT align
)
32773 unsigned int i
, nelt_v8
, nelt_v16
, nelt_mode
;
32774 rtx dst
, addr
, mem
;
32777 unsigned int offset
= 0;
32779 gcc_assert ((align
& 0x3) == 0);
32780 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
32781 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
32782 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
32787 nelt_mode
= GET_MODE_NUNITS (mode
);
32788 gcc_assert (length
>= nelt_mode
);
32789 /* Skip if it isn't profitable. */
32790 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
32793 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32795 reg
= gen_reg_rtx (mode
);
32796 val_vec
= gen_const_vec_duplicate (mode
, gen_int_mode (value
, QImode
));
32797 /* Emit instruction loading the constant value. */
32798 emit_move_insn (reg
, val_vec
);
32801 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
32802 if (mode
== V16QImode
)
32804 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32805 emit_insn (gen_movmisalignv16qi (mem
, reg
));
32807 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
32808 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
32810 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
32811 offset
+= length
- nelt_mode
;
32812 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32813 /* We are shifting bytes back, set the alignment accordingly. */
32814 if ((length
& 0x3) == 0)
32815 set_mem_align (mem
, BITS_PER_UNIT
* 4);
32816 else if ((length
& 0x1) == 0)
32817 set_mem_align (mem
, BITS_PER_UNIT
* 2);
32819 set_mem_align (mem
, BITS_PER_UNIT
);
32821 emit_insn (gen_movmisalignv16qi (mem
, reg
));
32824 /* Fall through for bytes leftover. */
32826 nelt_mode
= GET_MODE_NUNITS (mode
);
32827 reg
= gen_lowpart (V8QImode
, reg
);
32830 /* Handle 8 bytes in a vector. */
32831 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
32833 addr
= plus_constant (Pmode
, dst
, i
);
32834 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
32835 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
32836 emit_move_insn (mem
, reg
);
32838 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
32841 /* Handle single word leftover by shifting 4 bytes back. We can
32842 use aligned access for this case. */
32843 if (i
+ UNITS_PER_WORD
== length
)
32845 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
32846 offset
+= i
- UNITS_PER_WORD
;
32847 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
32848 /* We are shifting 4 bytes back, set the alignment accordingly. */
32849 if (align
> UNITS_PER_WORD
)
32850 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
32852 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
32854 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32855 We have to use unaligned access for this case. */
32856 else if (i
< length
)
32858 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
32859 offset
+= length
- nelt_mode
;
32860 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32861 /* We are shifting bytes back, set the alignment accordingly. */
32862 if ((length
& 1) == 0)
32863 set_mem_align (mem
, BITS_PER_UNIT
* 2);
32865 set_mem_align (mem
, BITS_PER_UNIT
);
32867 emit_insn (gen_movmisalignv8qi (mem
, reg
));
32873 /* Set a block of memory using plain strh/strb instructions, only
32874 using instructions allowed by ALIGN on processor. We fill the
32875 first LENGTH bytes of the memory area starting from DSTBASE
32876 with byte constant VALUE. ALIGN is the alignment requirement
32879 arm_block_set_unaligned_non_vect (rtx dstbase
,
32880 unsigned HOST_WIDE_INT length
,
32881 unsigned HOST_WIDE_INT value
,
32882 unsigned HOST_WIDE_INT align
)
32885 rtx dst
, addr
, mem
;
32886 rtx val_exp
, val_reg
, reg
;
32888 HOST_WIDE_INT v
= value
;
32890 gcc_assert (align
== 1 || align
== 2);
32893 v
|= (value
<< BITS_PER_UNIT
);
32895 v
= sext_hwi (v
, BITS_PER_WORD
);
32896 val_exp
= GEN_INT (v
);
32897 /* Skip if it isn't profitable. */
32898 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32899 align
, true, false))
32902 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32903 mode
= (align
== 2 ? HImode
: QImode
);
32904 val_reg
= force_reg (SImode
, val_exp
);
32905 reg
= gen_lowpart (mode
, val_reg
);
32907 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
32909 addr
= plus_constant (Pmode
, dst
, i
);
32910 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
32911 emit_move_insn (mem
, reg
);
32914 /* Handle single byte leftover. */
32915 if (i
+ 1 == length
)
32917 reg
= gen_lowpart (QImode
, val_reg
);
32918 addr
= plus_constant (Pmode
, dst
, i
);
32919 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
32920 emit_move_insn (mem
, reg
);
32924 gcc_assert (i
== length
);
32928 /* Set a block of memory using plain strd/str/strh/strb instructions,
32929 to permit unaligned copies on processors which support unaligned
32930 semantics for those instructions. We fill the first LENGTH bytes
32931 of the memory area starting from DSTBASE with byte constant VALUE.
32932 ALIGN is the alignment requirement of memory. */
32934 arm_block_set_aligned_non_vect (rtx dstbase
,
32935 unsigned HOST_WIDE_INT length
,
32936 unsigned HOST_WIDE_INT value
,
32937 unsigned HOST_WIDE_INT align
)
32940 rtx dst
, addr
, mem
;
32941 rtx val_exp
, val_reg
, reg
;
32942 unsigned HOST_WIDE_INT v
;
32945 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
32946 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
32948 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
32949 if (length
< UNITS_PER_WORD
)
32950 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
32953 v
|= (v
<< BITS_PER_WORD
);
32955 v
= sext_hwi (v
, BITS_PER_WORD
);
32957 val_exp
= GEN_INT (v
);
32958 /* Skip if it isn't profitable. */
32959 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32960 align
, false, use_strd_p
))
32965 /* Try without strd. */
32966 v
= (v
>> BITS_PER_WORD
);
32967 v
= sext_hwi (v
, BITS_PER_WORD
);
32968 val_exp
= GEN_INT (v
);
32969 use_strd_p
= false;
32970 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32971 align
, false, use_strd_p
))
32976 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32977 /* Handle double words using strd if possible. */
32980 val_reg
= force_reg (DImode
, val_exp
);
32982 for (; (i
+ 8 <= length
); i
+= 8)
32984 addr
= plus_constant (Pmode
, dst
, i
);
32985 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
32986 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
32987 emit_move_insn (mem
, reg
);
32989 emit_insn (gen_unaligned_storedi (mem
, reg
));
32993 val_reg
= force_reg (SImode
, val_exp
);
32995 /* Handle words. */
32996 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
32997 for (; (i
+ 4 <= length
); i
+= 4)
32999 addr
= plus_constant (Pmode
, dst
, i
);
33000 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
33001 if ((align
& 3) == 0)
33002 emit_move_insn (mem
, reg
);
33004 emit_insn (gen_unaligned_storesi (mem
, reg
));
33007 /* Merge last pair of STRH and STRB into a STR if possible. */
33008 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
33010 addr
= plus_constant (Pmode
, dst
, i
- 1);
33011 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
33012 /* We are shifting one byte back, set the alignment accordingly. */
33013 if ((align
& 1) == 0)
33014 set_mem_align (mem
, BITS_PER_UNIT
);
33016 /* Most likely this is an unaligned access, and we can't tell at
33017 compilation time. */
33018 emit_insn (gen_unaligned_storesi (mem
, reg
));
33022 /* Handle half word leftover. */
33023 if (i
+ 2 <= length
)
33025 reg
= gen_lowpart (HImode
, val_reg
);
33026 addr
= plus_constant (Pmode
, dst
, i
);
33027 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
33028 if ((align
& 1) == 0)
33029 emit_move_insn (mem
, reg
);
33031 emit_insn (gen_unaligned_storehi (mem
, reg
));
33036 /* Handle single byte leftover. */
33037 if (i
+ 1 == length
)
33039 reg
= gen_lowpart (QImode
, val_reg
);
33040 addr
= plus_constant (Pmode
, dst
, i
);
33041 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
33042 emit_move_insn (mem
, reg
);
33048 /* Set a block of memory using vectorization instructions for both
33049 aligned and unaligned cases. We fill the first LENGTH bytes of
33050 the memory area starting from DSTBASE with byte constant VALUE.
33051 ALIGN is the alignment requirement of memory. */
33053 arm_block_set_vect (rtx dstbase
,
33054 unsigned HOST_WIDE_INT length
,
33055 unsigned HOST_WIDE_INT value
,
33056 unsigned HOST_WIDE_INT align
)
33058 /* Check whether we need to use unaligned store instruction. */
33059 if (((align
& 3) != 0 || (length
& 3) != 0)
33060 /* Check whether unaligned store instruction is available. */
33061 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
33064 if ((align
& 3) == 0)
33065 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
33067 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
33070 /* Expand string store operation. Firstly we try to do that by using
33071 vectorization instructions, then try with ARM unaligned access and
33072 double-word store if profitable. OPERANDS[0] is the destination,
33073 OPERANDS[1] is the number of bytes, operands[2] is the value to
33074 initialize the memory, OPERANDS[3] is the known alignment of the
33077 arm_gen_setmem (rtx
*operands
)
33079 rtx dstbase
= operands
[0];
33080 unsigned HOST_WIDE_INT length
;
33081 unsigned HOST_WIDE_INT value
;
33082 unsigned HOST_WIDE_INT align
;
33084 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
33087 length
= UINTVAL (operands
[1]);
33091 value
= (UINTVAL (operands
[2]) & 0xFF);
33092 align
= UINTVAL (operands
[3]);
33093 if (TARGET_NEON
&& length
>= 8
33094 && current_tune
->string_ops_prefer_neon
33095 && arm_block_set_vect (dstbase
, length
, value
, align
))
33098 if (!unaligned_access
&& (align
& 3) != 0)
33099 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
33101 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
33106 arm_macro_fusion_p (void)
33108 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
33111 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
33112 for MOVW / MOVT macro fusion. */
33115 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
33117 /* We are trying to fuse
33118 movw imm / movt imm
33119 instructions as a group that gets scheduled together. */
33121 rtx set_dest
= SET_DEST (curr_set
);
33123 if (GET_MODE (set_dest
) != SImode
)
33126 /* We are trying to match:
33127 prev (movw) == (set (reg r0) (const_int imm16))
33128 curr (movt) == (set (zero_extract (reg r0)
33131 (const_int imm16_1))
33133 prev (movw) == (set (reg r1)
33134 (high (symbol_ref ("SYM"))))
33135 curr (movt) == (set (reg r0)
33137 (symbol_ref ("SYM")))) */
33139 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
33141 if (CONST_INT_P (SET_SRC (curr_set
))
33142 && CONST_INT_P (SET_SRC (prev_set
))
33143 && REG_P (XEXP (set_dest
, 0))
33144 && REG_P (SET_DEST (prev_set
))
33145 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
33149 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
33150 && REG_P (SET_DEST (curr_set
))
33151 && REG_P (SET_DEST (prev_set
))
33152 && GET_CODE (SET_SRC (prev_set
)) == HIGH
33153 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
33160 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
33162 rtx prev_set
= single_set (prev
);
33163 rtx curr_set
= single_set (curr
);
33169 if (any_condjump_p (curr
))
33172 if (!arm_macro_fusion_p ())
33175 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
33176 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
33182 /* Return true iff the instruction fusion described by OP is enabled. */
33184 arm_fusion_enabled_p (tune_params::fuse_ops op
)
33186 return current_tune
->fusible_ops
& op
;
33189 /* Return TRUE if return address signing mechanism is enabled. */
33191 arm_current_function_pac_enabled_p (void)
33193 return (aarch_ra_sign_scope
== AARCH_FUNCTION_ALL
33194 || (aarch_ra_sign_scope
== AARCH_FUNCTION_NON_LEAF
33195 && !crtl
->is_leaf
));
33198 /* Raise an error if the current target arch is not bti compatible. */
33199 void aarch_bti_arch_check (void)
33201 if (!arm_arch8m_main
)
33202 error ("This architecture does not support branch protection instructions");
33205 /* Return TRUE if Branch Target Identification Mechanism is enabled. */
33207 aarch_bti_enabled (void)
33209 return aarch_enable_bti
!= 0;
33212 /* Check if INSN is a BTI J insn. */
33214 aarch_bti_j_insn_p (rtx_insn
*insn
)
33216 if (!insn
|| !INSN_P (insn
))
33219 rtx pat
= PATTERN (insn
);
33220 return GET_CODE (pat
) == UNSPEC_VOLATILE
&& XINT (pat
, 1) == VUNSPEC_BTI_NOP
;
33223 /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction. */
33225 aarch_pac_insn_p (rtx x
)
33227 if (!x
|| !INSN_P (x
))
33230 rtx pat
= PATTERN (x
);
33232 if (GET_CODE (pat
) == SET
)
33234 rtx tmp
= XEXP (pat
, 1);
33236 && ((GET_CODE (tmp
) == UNSPEC
33237 && XINT (tmp
, 1) == UNSPEC_PAC_NOP
)
33238 || (GET_CODE (tmp
) == UNSPEC_VOLATILE
33239 && XINT (tmp
, 1) == VUNSPEC_PACBTI_NOP
)))
33246 /* Target specific mapping for aarch_gen_bti_c and aarch_gen_bti_j.
33247 For Arm, both of these map to a simple BTI instruction. */
33250 aarch_gen_bti_c (void)
33252 return gen_bti_nop ();
33256 aarch_gen_bti_j (void)
33258 return gen_bti_nop ();
33261 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
33262 scheduled for speculative execution. Reject the long-running division
33263 and square-root instructions. */
33266 arm_sched_can_speculate_insn (rtx_insn
*insn
)
33268 switch (get_attr_type (insn
))
33276 case TYPE_NEON_FP_SQRT_S
:
33277 case TYPE_NEON_FP_SQRT_D
:
33278 case TYPE_NEON_FP_SQRT_S_Q
:
33279 case TYPE_NEON_FP_SQRT_D_Q
:
33280 case TYPE_NEON_FP_DIV_S
:
33281 case TYPE_NEON_FP_DIV_D
:
33282 case TYPE_NEON_FP_DIV_S_Q
:
33283 case TYPE_NEON_FP_DIV_D_Q
:
33290 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
33292 static unsigned HOST_WIDE_INT
33293 arm_asan_shadow_offset (void)
33295 return HOST_WIDE_INT_1U
<< 29;
33299 /* This is a temporary fix for PR60655. Ideally we need
33300 to handle most of these cases in the generic part but
33301 currently we reject minus (..) (sym_ref). We try to
33302 ameliorate the case with minus (sym_ref1) (sym_ref2)
33303 where they are in the same section. */
33306 arm_const_not_ok_for_debug_p (rtx p
)
33308 tree decl_op0
= NULL
;
33309 tree decl_op1
= NULL
;
33311 if (GET_CODE (p
) == UNSPEC
)
33313 if (GET_CODE (p
) == MINUS
)
33315 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
33317 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
33319 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
33320 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
33322 if ((VAR_P (decl_op1
)
33323 || TREE_CODE (decl_op1
) == CONST_DECL
)
33324 && (VAR_P (decl_op0
)
33325 || TREE_CODE (decl_op0
) == CONST_DECL
))
33326 return (get_variable_section (decl_op1
, false)
33327 != get_variable_section (decl_op0
, false));
33329 if (TREE_CODE (decl_op1
) == LABEL_DECL
33330 && TREE_CODE (decl_op0
) == LABEL_DECL
)
33331 return (DECL_CONTEXT (decl_op1
)
33332 != DECL_CONTEXT (decl_op0
));
33342 /* return TRUE if x is a reference to a value in a constant pool */
33344 arm_is_constant_pool_ref (rtx x
)
33347 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
33348 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
33351 /* Remember the last target of arm_set_current_function. */
33352 static GTY(()) tree arm_previous_fndecl
;
33354 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
33357 save_restore_target_globals (tree new_tree
)
33359 /* If we have a previous state, use it. */
33360 if (TREE_TARGET_GLOBALS (new_tree
))
33361 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
33362 else if (new_tree
== target_option_default_node
)
33363 restore_target_globals (&default_target_globals
);
33366 /* Call target_reinit and save the state for TARGET_GLOBALS. */
33367 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
33370 arm_option_params_internal ();
33373 /* Invalidate arm_previous_fndecl. */
33376 arm_reset_previous_fndecl (void)
33378 arm_previous_fndecl
= NULL_TREE
;
33381 /* Establish appropriate back-end context for processing the function
33382 FNDECL. The argument might be NULL to indicate processing at top
33383 level, outside of any function scope. */
33386 arm_set_current_function (tree fndecl
)
33388 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
33391 tree old_tree
= (arm_previous_fndecl
33392 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
33395 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
33397 /* If current function has no attributes but previous one did,
33398 use the default node. */
33399 if (! new_tree
&& old_tree
)
33400 new_tree
= target_option_default_node
;
33402 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
33403 the default have been handled by save_restore_target_globals from
33404 arm_pragma_target_parse. */
33405 if (old_tree
== new_tree
)
33408 arm_previous_fndecl
= fndecl
;
33410 /* First set the target options. */
33411 cl_target_option_restore (&global_options
, &global_options_set
,
33412 TREE_TARGET_OPTION (new_tree
));
33414 save_restore_target_globals (new_tree
);
33416 arm_override_options_after_change_1 (&global_options
, &global_options_set
);
33419 /* Implement TARGET_OPTION_PRINT. */
33422 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
33424 int flags
= ptr
->x_target_flags
;
33425 const char *fpu_name
;
33427 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
33428 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
33430 fprintf (file
, "%*sselected isa %s\n", indent
, "",
33431 TARGET_THUMB2_P (flags
) ? "thumb2" :
33432 TARGET_THUMB_P (flags
) ? "thumb1" :
33435 if (ptr
->x_arm_arch_string
)
33436 fprintf (file
, "%*sselected architecture %s\n", indent
, "",
33437 ptr
->x_arm_arch_string
);
33439 if (ptr
->x_arm_cpu_string
)
33440 fprintf (file
, "%*sselected CPU %s\n", indent
, "",
33441 ptr
->x_arm_cpu_string
);
33443 if (ptr
->x_arm_tune_string
)
33444 fprintf (file
, "%*sselected tune %s\n", indent
, "",
33445 ptr
->x_arm_tune_string
);
33447 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
33450 /* Hook to determine if one function can safely inline another. */
33453 arm_can_inline_p (tree caller
, tree callee
)
33455 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
33456 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
33457 bool can_inline
= true;
33459 struct cl_target_option
*caller_opts
33460 = TREE_TARGET_OPTION (caller_tree
? caller_tree
33461 : target_option_default_node
);
33463 struct cl_target_option
*callee_opts
33464 = TREE_TARGET_OPTION (callee_tree
? callee_tree
33465 : target_option_default_node
);
33467 if (callee_opts
== caller_opts
)
33470 /* Callee's ISA features should be a subset of the caller's. */
33471 struct arm_build_target caller_target
;
33472 struct arm_build_target callee_target
;
33473 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
33474 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
33476 arm_configure_build_target (&caller_target
, caller_opts
, false);
33477 arm_configure_build_target (&callee_target
, callee_opts
, false);
33478 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
33479 can_inline
= false;
33481 sbitmap_free (caller_target
.isa
);
33482 sbitmap_free (callee_target
.isa
);
33484 /* OK to inline between different modes.
33485 Function with mode specific instructions, e.g using asm,
33486 must be explicitly protected with noinline. */
33490 /* Hook to fix function's alignment affected by target attribute. */
33493 arm_relayout_function (tree fndecl
)
33495 if (DECL_USER_ALIGN (fndecl
))
33498 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
33501 callee_tree
= target_option_default_node
;
33503 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
33506 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
33509 /* Inner function to process the attribute((target(...))), take an argument and
33510 set the current options from the argument. If we have a list, recursively
33511 go over the list. */
33514 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
33516 if (TREE_CODE (args
) == TREE_LIST
)
33520 for (; args
; args
= TREE_CHAIN (args
))
33521 if (TREE_VALUE (args
)
33522 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
33527 else if (TREE_CODE (args
) != STRING_CST
)
33529 error ("attribute %<target%> argument not a string");
33533 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
33536 while ((q
= strtok (argstr
, ",")) != NULL
)
33539 if (!strcmp (q
, "thumb"))
33541 opts
->x_target_flags
|= MASK_THUMB
;
33542 if (TARGET_FDPIC
&& !arm_arch_thumb2
)
33543 sorry ("FDPIC mode is not supported in Thumb-1 mode");
33546 else if (!strcmp (q
, "arm"))
33547 opts
->x_target_flags
&= ~MASK_THUMB
;
33549 else if (!strcmp (q
, "general-regs-only"))
33550 opts
->x_target_flags
|= MASK_GENERAL_REGS_ONLY
;
33552 else if (startswith (q
, "fpu="))
33555 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+ 4,
33556 &fpu_index
, CL_TARGET
))
33558 error ("invalid fpu for target attribute or pragma %qs", q
);
33561 if (fpu_index
== TARGET_FPU_auto
)
33563 /* This doesn't really make sense until we support
33564 general dynamic selection of the architecture and all
33566 sorry ("auto fpu selection not currently permitted here");
33569 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
33571 else if (startswith (q
, "arch="))
33573 char *arch
= q
+ 5;
33574 const arch_option
*arm_selected_arch
33575 = arm_parse_arch_option_name (all_architectures
, "arch", arch
);
33577 if (!arm_selected_arch
)
33579 error ("invalid architecture for target attribute or pragma %qs",
33584 opts
->x_arm_arch_string
= xstrndup (arch
, strlen (arch
));
33586 else if (q
[0] == '+')
33588 opts
->x_arm_arch_string
33589 = xasprintf ("%s%s", opts
->x_arm_arch_string
, q
);
33593 error ("unknown target attribute or pragma %qs", q
);
33601 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
33604 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
33605 struct gcc_options
*opts_set
)
33607 struct cl_target_option cl_opts
;
33609 if (!arm_valid_target_attribute_rec (args
, opts
))
33612 cl_target_option_save (&cl_opts
, opts
, opts_set
);
33613 arm_configure_build_target (&arm_active_target
, &cl_opts
, false);
33614 arm_option_check_internal (opts
);
33615 /* Do any overrides, such as global options arch=xxx.
33616 We do this since arm_active_target was overridden. */
33617 arm_option_reconfigure_globals ();
33618 arm_options_perform_arch_sanity_checks ();
33619 arm_option_override_internal (opts
, opts_set
);
33621 return build_target_option_node (opts
, opts_set
);
33625 add_attribute (const char * mode
, tree
*attributes
)
33627 size_t len
= strlen (mode
);
33628 tree value
= build_string (len
, mode
);
33630 TREE_TYPE (value
) = build_array_type (char_type_node
,
33631 build_index_type (size_int (len
)));
33633 *attributes
= tree_cons (get_identifier ("target"),
33634 build_tree_list (NULL_TREE
, value
),
33638 /* For testing. Insert thumb or arm modes alternatively on functions. */
33641 arm_insert_attributes (tree fndecl
, tree
* attributes
)
33645 if (! TARGET_FLIP_THUMB
)
33648 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
33649 || fndecl_built_in_p (fndecl
) || DECL_ARTIFICIAL (fndecl
))
33652 /* Nested definitions must inherit mode. */
33653 if (current_function_decl
)
33655 mode
= TARGET_THUMB
? "thumb" : "arm";
33656 add_attribute (mode
, attributes
);
33660 /* If there is already a setting don't change it. */
33661 if (lookup_attribute ("target", *attributes
) != NULL
)
33664 mode
= thumb_flipper
? "thumb" : "arm";
33665 add_attribute (mode
, attributes
);
33667 thumb_flipper
= !thumb_flipper
;
33670 /* Hook to validate attribute((target("string"))). */
33673 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
33674 tree args
, int ARG_UNUSED (flags
))
33677 struct gcc_options func_options
, func_options_set
;
33678 tree cur_tree
, new_optimize
;
33679 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
33681 /* Get the optimization options of the current function. */
33682 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
33684 /* If the function changed the optimization levels as well as setting target
33685 options, start with the optimizations specified. */
33686 if (!func_optimize
)
33687 func_optimize
= optimization_default_node
;
33689 /* Init func_options. */
33690 memset (&func_options
, 0, sizeof (func_options
));
33691 init_options_struct (&func_options
, NULL
);
33692 lang_hooks
.init_options_struct (&func_options
);
33693 memset (&func_options_set
, 0, sizeof (func_options_set
));
33695 /* Initialize func_options to the defaults. */
33696 cl_optimization_restore (&func_options
, &func_options_set
,
33697 TREE_OPTIMIZATION (func_optimize
));
33699 cl_target_option_restore (&func_options
, &func_options_set
,
33700 TREE_TARGET_OPTION (target_option_default_node
));
33702 /* Set func_options flags with new target mode. */
33703 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
33704 &func_options_set
);
33706 if (cur_tree
== NULL_TREE
)
33709 new_optimize
= build_optimization_node (&func_options
, &func_options_set
);
33711 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
33713 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
33718 /* Match an ISA feature bitmap to a named FPU. We always use the
33719 first entry that exactly matches the feature set, so that we
33720 effectively canonicalize the FPU name for the assembler. */
33722 arm_identify_fpu_from_isa (sbitmap isa
)
33724 auto_sbitmap
fpubits (isa_num_bits
);
33725 auto_sbitmap
cand_fpubits (isa_num_bits
);
33727 bitmap_and (fpubits
, isa
, isa_all_fpubits_internal
);
33729 /* If there are no ISA feature bits relating to the FPU, we must be
33730 doing soft-float. */
33731 if (bitmap_empty_p (fpubits
))
33734 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
33736 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
33737 if (bitmap_equal_p (fpubits
, cand_fpubits
))
33738 return all_fpus
[i
].name
;
33740 /* We must find an entry, or things have gone wrong. */
33741 gcc_unreachable ();
33744 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
33745 by the function fndecl. */
33747 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
33749 tree target_parts
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
33751 struct cl_target_option
*targ_options
;
33753 targ_options
= TREE_TARGET_OPTION (target_parts
);
33755 targ_options
= TREE_TARGET_OPTION (target_option_current_node
);
33756 gcc_assert (targ_options
);
33758 arm_print_asm_arch_directives (stream
, targ_options
);
33760 fprintf (stream
, "\t.syntax unified\n");
33764 if (is_called_in_ARM_mode (decl
)
33765 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
33766 && cfun
->is_thunk
))
33767 fprintf (stream
, "\t.code 32\n");
33768 else if (TARGET_THUMB1
)
33769 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
33771 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
33774 fprintf (stream
, "\t.arm\n");
33776 if (TARGET_POKE_FUNCTION_NAME
)
33777 arm_poke_function_name (stream
, (const char *) name
);
33780 /* If MEM is in the form of [base+offset], extract the two parts
33781 of address and set to BASE and OFFSET, otherwise return false
33782 after clearing BASE and OFFSET. */
33785 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
33789 gcc_assert (MEM_P (mem
));
33791 addr
= XEXP (mem
, 0);
33793 /* Strip off const from addresses like (const (addr)). */
33794 if (GET_CODE (addr
) == CONST
)
33795 addr
= XEXP (addr
, 0);
33800 *offset
= const0_rtx
;
33804 if (GET_CODE (addr
) == PLUS
33805 && GET_CODE (XEXP (addr
, 0)) == REG
33806 && CONST_INT_P (XEXP (addr
, 1)))
33808 *base
= XEXP (addr
, 0);
33809 *offset
= XEXP (addr
, 1);
33814 *offset
= NULL_RTX
;
33819 /* If INSN is a load or store of address in the form of [base+offset],
33820 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
33821 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
33822 otherwise return FALSE. */
33825 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
33829 gcc_assert (INSN_P (insn
));
33830 x
= PATTERN (insn
);
33831 if (GET_CODE (x
) != SET
)
33835 dest
= SET_DEST (x
);
33836 if (REG_P (src
) && MEM_P (dest
))
33839 extract_base_offset_in_addr (dest
, base
, offset
);
33841 else if (MEM_P (src
) && REG_P (dest
))
33844 extract_base_offset_in_addr (src
, base
, offset
);
33849 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
33852 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33854 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33855 and PRI are only calculated for these instructions. For other instruction,
33856 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
33857 instruction fusion can be supported by returning different priorities.
33859 It's important that irrelevant instructions get the largest FUSION_PRI. */
33862 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
33863 int *fusion_pri
, int *pri
)
33869 gcc_assert (INSN_P (insn
));
33872 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
33879 /* Load goes first. */
33881 *fusion_pri
= tmp
- 1;
33883 *fusion_pri
= tmp
- 2;
33887 /* INSN with smaller base register goes first. */
33888 tmp
-= ((REGNO (base
) & 0xff) << 20);
33890 /* INSN with smaller offset goes first. */
33891 off_val
= (int)(INTVAL (offset
));
33893 tmp
-= (off_val
& 0xfffff);
33895 tmp
+= ((- off_val
) & 0xfffff);
33902 /* Construct and return a PARALLEL RTX vector with elements numbering the
33903 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
33904 the vector - from the perspective of the architecture. This does not
33905 line up with GCC's perspective on lane numbers, so we end up with
33906 different masks depending on our target endian-ness. The diagram
33907 below may help. We must draw the distinction when building masks
33908 which select one half of the vector. An instruction selecting
33909 architectural low-lanes for a big-endian target, must be described using
33910 a mask selecting GCC high-lanes.
33912 Big-Endian Little-Endian
33914 GCC 0 1 2 3 3 2 1 0
33915 | x | x | x | x | | x | x | x | x |
33916 Architecture 3 2 1 0 3 2 1 0
33918 Low Mask: { 2, 3 } { 0, 1 }
33919 High Mask: { 0, 1 } { 2, 3 }
33923 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
33925 int nunits
= GET_MODE_NUNITS (mode
);
33926 rtvec v
= rtvec_alloc (nunits
/ 2);
33927 int high_base
= nunits
/ 2;
33933 if (BYTES_BIG_ENDIAN
)
33934 base
= high
? low_base
: high_base
;
33936 base
= high
? high_base
: low_base
;
33938 for (i
= 0; i
< nunits
/ 2; i
++)
33939 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
33941 t1
= gen_rtx_PARALLEL (mode
, v
);
33945 /* Check OP for validity as a PARALLEL RTX vector with elements
33946 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
33947 from the perspective of the architecture. See the diagram above
33948 arm_simd_vect_par_cnst_half_p for more details. */
33951 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
33954 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
33955 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
33956 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
33959 if (!VECTOR_MODE_P (mode
))
33962 if (count_op
!= count_ideal
)
33965 for (i
= 0; i
< count_ideal
; i
++)
33967 rtx elt_op
= XVECEXP (op
, 0, i
);
33968 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
33970 if (!CONST_INT_P (elt_op
)
33971 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
33977 /* Can output mi_thunk for all cases except for non-zero vcall_offset
33980 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
33983 /* For now, we punt and not handle this for TARGET_THUMB1. */
33984 if (vcall_offset
&& TARGET_THUMB1
)
33987 /* Otherwise ok. */
33991 /* Generate RTL for a conditional branch with rtx comparison CODE in
33992 mode CC_MODE. The destination of the unlikely conditional branch
33996 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
34000 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
34001 gen_rtx_REG (cc_mode
, CC_REGNUM
),
34004 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
34005 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
34007 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
34010 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
34012 For pure-code sections there is no letter code for this attribute, so
34013 output all the section flags numerically when this is needed. */
34016 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
34019 if (flags
& SECTION_ARM_PURECODE
)
34023 if (!(flags
& SECTION_DEBUG
))
34025 if (flags
& SECTION_EXCLUDE
)
34026 *num
|= 0x80000000;
34027 if (flags
& SECTION_WRITE
)
34029 if (flags
& SECTION_CODE
)
34031 if (flags
& SECTION_MERGE
)
34033 if (flags
& SECTION_STRINGS
)
34035 if (flags
& SECTION_TLS
)
34037 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
34046 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
34048 If pure-code is passed as an option, make sure all functions are in
34049 sections that have the SHF_ARM_PURECODE attribute. */
34052 arm_function_section (tree decl
, enum node_frequency freq
,
34053 bool startup
, bool exit
)
34055 const char * section_name
;
34058 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
34059 return default_function_section (decl
, freq
, startup
, exit
);
34061 if (!target_pure_code
)
34062 return default_function_section (decl
, freq
, startup
, exit
);
34065 section_name
= DECL_SECTION_NAME (decl
);
34067 /* If a function is not in a named section then it falls under the 'default'
34068 text section, also known as '.text'. We can preserve previous behavior as
34069 the default text section already has the SHF_ARM_PURECODE section
34073 section
*default_sec
= default_function_section (decl
, freq
, startup
,
34076 /* If default_sec is not null, then it must be a special section like for
34077 example .text.startup. We set the pure-code attribute and return the
34078 same section to preserve existing behavior. */
34080 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
34081 return default_sec
;
34084 /* Otherwise look whether a section has already been created with
34086 sec
= get_named_section (decl
, section_name
, 0);
34088 /* If that is not the case passing NULL as the section's name to
34089 'get_named_section' will create a section with the declaration's
34091 sec
= get_named_section (decl
, NULL
, 0);
34093 /* Set the SHF_ARM_PURECODE attribute. */
34094 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
34099 /* Implements the TARGET_SECTION_FLAGS hook.
34101 If DECL is a function declaration and pure-code is passed as an option
34102 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
34103 section's name and RELOC indicates whether the declarations initializer may
34104 contain runtime relocations. */
34106 static unsigned int
34107 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
34109 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
34111 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
34112 flags
|= SECTION_ARM_PURECODE
;
34117 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
34120 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
34122 rtx
*quot_p
, rtx
*rem_p
)
34124 if (mode
== SImode
)
34125 gcc_assert (!TARGET_IDIV
);
34127 scalar_int_mode libval_mode
34128 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode
));
34130 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
34131 libval_mode
, op0
, mode
, op1
, mode
);
34133 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
34134 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
34135 GET_MODE_SIZE (mode
));
34137 gcc_assert (quotient
);
34138 gcc_assert (remainder
);
34140 *quot_p
= quotient
;
34141 *rem_p
= remainder
;
34144 /* This function checks for the availability of the coprocessor builtin passed
34145 in BUILTIN for the current target. Returns true if it is available and
34146 false otherwise. If a BUILTIN is passed for which this function has not
34147 been implemented it will cause an exception. */
34150 arm_coproc_builtin_available (enum unspecv builtin
)
34152 /* None of these builtins are available in Thumb mode if the target only
34153 supports Thumb-1. */
34171 case VUNSPEC_LDC2L
:
34173 case VUNSPEC_STC2L
:
34176 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
34183 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
34185 if (arm_arch6
|| arm_arch5te
)
34188 case VUNSPEC_MCRR2
:
34189 case VUNSPEC_MRRC2
:
34194 gcc_unreachable ();
34199 /* This function returns true if OP is a valid memory operand for the ldc and
34200 stc coprocessor instructions and false otherwise. */
34203 arm_coproc_ldc_stc_legitimate_address (rtx op
)
34205 HOST_WIDE_INT range
;
34206 /* Has to be a memory operand. */
34212 /* We accept registers. */
34216 switch GET_CODE (op
)
34220 /* Or registers with an offset. */
34221 if (!REG_P (XEXP (op
, 0)))
34226 /* The offset must be an immediate though. */
34227 if (!CONST_INT_P (op
))
34230 range
= INTVAL (op
);
34232 /* Within the range of [-1020,1020]. */
34233 if (!IN_RANGE (range
, -1020, 1020))
34236 /* And a multiple of 4. */
34237 return (range
% 4) == 0;
34243 return REG_P (XEXP (op
, 0));
34245 gcc_unreachable ();
34250 /* Return the diagnostic message string if conversion from FROMTYPE to
34251 TOTYPE is not allowed, NULL otherwise. */
34253 static const char *
34254 arm_invalid_conversion (const_tree fromtype
, const_tree totype
)
34256 if (element_mode (fromtype
) != element_mode (totype
))
34258 /* Do no allow conversions to/from BFmode scalar types. */
34259 if (TYPE_MODE (fromtype
) == BFmode
)
34260 return N_("invalid conversion from type %<bfloat16_t%>");
34261 if (TYPE_MODE (totype
) == BFmode
)
34262 return N_("invalid conversion to type %<bfloat16_t%>");
34265 /* Conversion allowed. */
34269 /* Return the diagnostic message string if the unary operation OP is
34270 not permitted on TYPE, NULL otherwise. */
34272 static const char *
34273 arm_invalid_unary_op (int op
, const_tree type
)
34275 /* Reject all single-operand operations on BFmode except for &. */
34276 if (element_mode (type
) == BFmode
&& op
!= ADDR_EXPR
)
34277 return N_("operation not permitted on type %<bfloat16_t%>");
34279 /* Operation allowed. */
34283 /* Return the diagnostic message string if the binary operation OP is
34284 not permitted on TYPE1 and TYPE2, NULL otherwise. */
34286 static const char *
34287 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
,
34290 /* Reject all 2-operand operations on BFmode. */
34291 if (element_mode (type1
) == BFmode
34292 || element_mode (type2
) == BFmode
)
34293 return N_("operation not permitted on type %<bfloat16_t%>");
34295 /* Operation allowed. */
34299 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34301 In VFPv1, VFP registers could only be accessed in the mode they were
34302 set, so subregs would be invalid there. However, we don't support
34303 VFPv1 at the moment, and the restriction was lifted in VFPv2.
34305 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34306 VFP registers in little-endian order. We can't describe that accurately to
34307 GCC, so avoid taking subregs of such values.
34309 The only exception is going from a 128-bit to a 64-bit type. In that
34310 case the data layout happens to be consistent for big-endian, so we
34311 explicitly allow that case. */
34314 arm_can_change_mode_class (machine_mode from
, machine_mode to
,
34315 reg_class_t rclass
)
34318 && !(GET_MODE_SIZE (from
) == 16 && GET_MODE_SIZE (to
) == 8)
34319 && (GET_MODE_SIZE (from
) > UNITS_PER_WORD
34320 || GET_MODE_SIZE (to
) > UNITS_PER_WORD
)
34321 && reg_classes_intersect_p (VFP_REGS
, rclass
))
34326 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
34327 strcpy from constants will be faster. */
34329 static HOST_WIDE_INT
34330 arm_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
34332 unsigned int factor
= (TARGET_THUMB
|| ! arm_tune_xscale
? 1 : 2);
34333 if (TREE_CODE (exp
) == STRING_CST
&& !optimize_size
)
34334 return MAX (align
, BITS_PER_WORD
* factor
);
34338 /* Emit a speculation barrier on target architectures that do not have
34339 DSB/ISB directly. Such systems probably don't need a barrier
34340 themselves, but if the code is ever run on a later architecture, it
34341 might become a problem. */
34343 arm_emit_speculation_barrier_function ()
34345 emit_library_call (speculation_barrier_libfunc
, LCT_NORMAL
, VOIDmode
);
34348 /* Have we recorded an explicit access to the Q bit of APSR?. */
34350 arm_q_bit_access (void)
34352 if (cfun
&& cfun
->decl
)
34353 return lookup_attribute ("acle qbit",
34354 DECL_ATTRIBUTES (cfun
->decl
));
34358 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
34360 arm_ge_bits_access (void)
34362 if (cfun
&& cfun
->decl
)
34363 return lookup_attribute ("acle gebits",
34364 DECL_ATTRIBUTES (cfun
->decl
));
34368 /* NULL if insn INSN is valid within a low-overhead loop.
34369 Otherwise return why doloop cannot be applied. */
34371 static const char *
34372 arm_invalid_within_doloop (const rtx_insn
*insn
)
34374 if (!TARGET_HAVE_LOB
)
34375 return default_invalid_within_doloop (insn
);
34378 return "Function call in the loop.";
34380 if (reg_mentioned_p (gen_rtx_REG (SImode
, LR_REGNUM
), insn
))
34381 return "LR is used inside loop.";
34387 arm_target_insn_ok_for_lob (rtx insn
)
34389 basic_block bb
= BLOCK_FOR_INSN (insn
);
34390 /* Make sure the basic block of the target insn is a simple latch
34391 having as single predecessor and successor the body of the loop
34392 itself. Only simple loops with a single basic block as body are
34393 supported for 'low over head loop' making sure that LE target is
34394 above LE itself in the generated code. */
34396 return single_succ_p (bb
)
34397 && single_pred_p (bb
)
34398 && single_succ_edge (bb
)->dest
== single_pred_edge (bb
)->src
34399 && contains_no_active_insn_p (bb
);
34403 namespace selftest
{
34405 /* Scan the static data tables generated by parsecpu.awk looking for
34406 potential issues with the data. We primarily check for
34407 inconsistencies in the option extensions at present (extensions
34408 that duplicate others but aren't marked as aliases). Furthermore,
34409 for correct canonicalization later options must never be a subset
34410 of an earlier option. Any extension should also only specify other
34411 feature bits and never an architecture bit. The architecture is inferred
34412 from the declaration of the extension. */
34414 arm_test_cpu_arch_data (void)
34416 const arch_option
*arch
;
34417 const cpu_option
*cpu
;
34418 auto_sbitmap
target_isa (isa_num_bits
);
34419 auto_sbitmap
isa1 (isa_num_bits
);
34420 auto_sbitmap
isa2 (isa_num_bits
);
34422 for (arch
= all_architectures
; arch
->common
.name
!= NULL
; ++arch
)
34424 const cpu_arch_extension
*ext1
, *ext2
;
34426 if (arch
->common
.extensions
== NULL
)
34429 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
34431 for (ext1
= arch
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
34436 arm_initialize_isa (isa1
, ext1
->isa_bits
);
34437 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
34439 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
34442 arm_initialize_isa (isa2
, ext2
->isa_bits
);
34443 /* If the option is a subset of the parent option, it doesn't
34444 add anything and so isn't useful. */
34445 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
34447 /* If the extension specifies any architectural bits then
34448 disallow it. Extensions should only specify feature bits. */
34449 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
34454 for (cpu
= all_cores
; cpu
->common
.name
!= NULL
; ++cpu
)
34456 const cpu_arch_extension
*ext1
, *ext2
;
34458 if (cpu
->common
.extensions
== NULL
)
34461 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
34463 for (ext1
= cpu
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
34468 arm_initialize_isa (isa1
, ext1
->isa_bits
);
34469 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
34471 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
34474 arm_initialize_isa (isa2
, ext2
->isa_bits
);
34475 /* If the option is a subset of the parent option, it doesn't
34476 add anything and so isn't useful. */
34477 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
34479 /* If the extension specifies any architectural bits then
34480 disallow it. Extensions should only specify feature bits. */
34481 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
34487 /* Scan the static data tables generated by parsecpu.awk looking for
34488 potential issues with the data. Here we check for consistency between the
34489 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34490 a feature bit that is not defined by any FPU flag. */
34492 arm_test_fpu_data (void)
34494 auto_sbitmap
isa_all_fpubits_internal (isa_num_bits
);
34495 auto_sbitmap
fpubits (isa_num_bits
);
34496 auto_sbitmap
tmpset (isa_num_bits
);
34498 static const enum isa_feature fpu_bitlist_internal
[]
34499 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
34500 arm_initialize_isa (isa_all_fpubits_internal
, fpu_bitlist_internal
);
34502 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
34504 arm_initialize_isa (fpubits
, all_fpus
[i
].isa_bits
);
34505 bitmap_and_compl (tmpset
, isa_all_fpubits_internal
, fpubits
);
34506 bitmap_clear (isa_all_fpubits_internal
);
34507 bitmap_copy (isa_all_fpubits_internal
, tmpset
);
34510 if (!bitmap_empty_p (isa_all_fpubits_internal
))
34512 fprintf (stderr
, "Error: found feature bits in the ALL_FPU_INTERAL"
34513 " group that are not defined by any FPU.\n"
34514 " Check your arm-cpus.in.\n");
34515 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal
));
34520 arm_run_selftests (void)
34522 arm_test_cpu_arch_data ();
34523 arm_test_fpu_data ();
34525 } /* Namespace selftest. */
34527 #undef TARGET_RUN_TARGET_SELFTESTS
34528 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34529 #endif /* CHECKING_P */
34531 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34532 global variable based guard use the default else
34533 return a null tree. */
34535 arm_stack_protect_guard (void)
34537 if (arm_stack_protector_guard
== SSP_GLOBAL
)
34538 return default_stack_protect_guard ();
34543 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34544 Unlike the arm version, we do NOT implement asm flag outputs. */
34547 thumb1_md_asm_adjust (vec
<rtx
> &outputs
, vec
<rtx
> & /*inputs*/,
34548 vec
<machine_mode
> & /*input_modes*/,
34549 vec
<const char *> &constraints
, vec
<rtx
> & /*clobbers*/,
34550 HARD_REG_SET
& /*clobbered_regs*/, location_t
/*loc*/)
34552 for (unsigned i
= 0, n
= outputs
.length (); i
< n
; ++i
)
34553 if (startswith (constraints
[i
], "=@cc"))
34555 sorry ("%<asm%> flags not supported in thumb1 mode");
34561 /* Generate code to enable conditional branches in functions over 1 MiB.
34563 operands: is the operands list of the asm insn (see arm_cond_branch or
34564 arm_cond_branch_reversed).
34565 pos_label: is an index into the operands array where operands[pos_label] is
34566 the asm label of the final jump destination.
34567 dest: is a string which is used to generate the asm label of the intermediate
34569 branch_format: is a string denoting the intermediate branch format, e.g.
34570 "beq", "bne", etc. */
34573 arm_gen_far_branch (rtx
* operands
, int pos_label
, const char * dest
,
34574 const char * branch_format
)
34576 rtx_code_label
* tmp_label
= gen_label_rtx ();
34577 char label_buf
[256];
34579 ASM_GENERATE_INTERNAL_LABEL (label_buf
, dest
, \
34580 CODE_LABEL_NUMBER (tmp_label
));
34581 const char *label_ptr
= arm_strip_name_encoding (label_buf
);
34582 rtx dest_label
= operands
[pos_label
];
34583 operands
[pos_label
] = tmp_label
;
34585 snprintf (buffer
, sizeof (buffer
), "%s%s", branch_format
, label_ptr
);
34586 output_asm_insn (buffer
, operands
);
34588 snprintf (buffer
, sizeof (buffer
), "b\t%%l0%d\n%s:", pos_label
, label_ptr
);
34589 operands
[pos_label
] = dest_label
;
34590 output_asm_insn (buffer
, operands
);
34594 /* If given mode matches, load from memory to LO_REGS.
34595 (i.e [Rn], Rn <= LO_REGS). */
34597 arm_mode_base_reg_class (machine_mode mode
)
34599 if (TARGET_HAVE_MVE
34600 && (mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
))
34603 return MODE_BASE_REG_REG_CLASS (mode
);
34606 struct gcc_target targetm
= TARGET_INITIALIZER
;
34608 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
34611 arm_get_mask_mode (machine_mode mode
)
34613 if (TARGET_HAVE_MVE
)
34614 return arm_mode_to_pred_mode (mode
);
34616 return default_get_mask_mode (mode
);
34619 #include "gt-arm.h"